Source code for kgx.utils.graph_utils

import logging
from typing import List, Set
import networkx as nx
import stringcase
from cachetools import cached

from kgx.mapper import get_prefix
from kgx.utils.kgx_utils import get_toolkit, get_cache, get_curie_lookup_service
from kgx.validator import is_curie

ONTOLOGY_PREFIX_MAP = {}
ONTOLOGY_GRAPH_CACHE = {}


[docs]def get_parents(graph: nx.MultiDiGraph, node: str, relations: List[str] = None) -> List[str]: """ Return all direct `parents` of a specified node, filtered by ``relations``. Parameters ---------- graph: networkx.MultiDiGraph Graph to traverse node: str node identifier relations: List[str] list of relations Returns ------- List[str] A list of parent node(s) """ parents = [] if node in graph: out_edges = [x for x in graph.out_edges(node, data=True)] if relations is None: parents = [x[1] for x in out_edges] else: parents = [x[1] for x in out_edges if x[2]['edge_label'] in relations] return parents
[docs]def get_ancestors(graph: nx.MultiDiGraph, node: str, relations: List[str] = None) -> List[str]: """ Return all `ancestors` of specified node, filtered by ``relations``. Parameters ---------- graph: networkx.MultiDiGraph Graph to traverse node: str node identifier relations: List[str] list of relations Returns ------- List[str] A list of ancestor nodes """ seen = [] nextnodes = [node] while len(nextnodes) > 0: nn = nextnodes.pop() if nn not in seen: seen.append(nn) nextnodes += get_parents(graph, nn, relations=relations) seen.remove(node) return seen
[docs]@cached(get_cache()) def get_category_via_superclass(graph: nx.MultiDiGraph, curie: str, load_ontology: bool = True) -> Set[str]: """ Get category for a given CURIE by tracing its superclass, via ``subclass_of`` hierarchy, and getting the most appropriate category based on the superclass. Parameters ---------- graph: networkx.MultiDiGraph Graph to traverse curie: str Input CURIE load_ontology: bool Determines whether to load ontology, based on CURIE prefix, or to simply rely on ``subclass_of`` hierarchy from graph Returns ------- Set[str] A set containing one (or more) category for the given CURIE """ logging.debug("curie: {}".format(curie)) new_categories = [] toolkit = get_toolkit() if is_curie(curie): ancestors = get_ancestors(graph, curie, relations=['subclass_of']) if len(ancestors) == 0 and load_ontology: cls = get_curie_lookup_service() ontology_graph = cls.ontology_graph new_categories += [x for x in get_category_via_superclass(ontology_graph, curie, False)] logging.debug("Ancestors for CURIE {} via subClassOf: {}".format(curie, ancestors)) seen = [] for anc in ancestors: mapping = toolkit.get_by_mapping(anc) seen.append(anc) if mapping: # there is direct mapping to BioLink Model logging.debug("Ancestor {} mapped to {}".format(anc, mapping)) seen_labels = [graph.nodes[x]['name'] for x in seen if 'name' in graph.nodes[x]] new_categories += [x for x in seen_labels] new_categories += [x for x in toolkit.ancestors(mapping)] break return set(new_categories)
[docs]def curie_lookup(curie: str) -> str: """ Given a CURIE, find its label. This method first does a lookup in predefined maps. If none found, it makes use of CurieLookupService to look for the CURIE in a set of preloaded ontologies. Parameters ---------- curie: str A CURIE Returns ------- str The label corresponding to the given CURIE """ cls = get_curie_lookup_service() name = None prefix = get_prefix(curie) if prefix in ['OIO', 'OWL', 'owl', 'OBO', 'rdfs']: name = stringcase.snakecase(curie.split(':', 1)[1]) elif curie in cls.curie_map: name = cls.curie_map[curie] elif curie in cls.ontology_graph: name = g.nodes[curie]['name'] return name