Source code for bioontologies.robot

# -*- coding: utf-8 -*-

"""A wrapper around ROBOT functionality.

.. seealso:: https://robot.obolibrary.org
"""

import dataclasses
import json
import logging
import os
import subprocess
import tempfile
from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
from subprocess import check_output
from typing import List, Optional, Union

import bioregistry
import pystow
import requests
from pystow.utils import download, name_from_url
from typing_extensions import Literal

from .obograph import Graph, GraphDocument

__all__ = [
    "is_available",
    "ParseResults",
    # Conversions
    "convert",
    "convert_to_obograph_local",
    "convert_to_obograph_remote",
    "convert_to_obograph",
    # Processors
    "get_obograph_by_prefix",
    "get_obograph_by_iri",
    "get_obograph_by_path",
]

logger = logging.getLogger(__name__)

LATEST = "1.9.4"
ROBOT_URL = f"https://github.com/ontodev/robot/releases/download/v{LATEST}/robot.jar"
ROBOT_MODULE = pystow.module("robot")
ROBOT_PATH = ROBOT_MODULE.ensure(url=ROBOT_URL)
ROBOT_COMMAND = ["java", "-jar", str(ROBOT_PATH)]


[docs] def is_available() -> bool: """Check if ROBOT is available.""" from shutil import which if which("java") is None: # suggested in https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script logger.error("java is not on the PATH") return False try: check_output(["java", "--help"]) # noqa:S607 except Exception: logger.error( "java --help failed - this means the java runtime environment (JRE) " "might not be configured properly" ) return False if not ROBOT_PATH.is_file(): logger.error("ROBOT was not successfully downloaded to %s", ROBOT_PATH) # ROBOT was unsuccessfully downloaded return False try: # Check check_output([*ROBOT_COMMAND, "--help"]) except Exception: logger.error("ROBOT was downloaded to %s but could not be run with --help", ROBOT_PATH) return False return True
[docs] @dataclass class ParseResults: """A dataclass containing an OBO Graph JSON and text output from ROBOT.""" graph_document: Optional[GraphDocument] messages: List[str] = dataclasses.field(default_factory=list) iri: Optional[str] = None
[docs] def squeeze(self, standardize: bool = False) -> Graph: """Get the first graph.""" if self.graph_document is None: raise ValueError(f"graph document was not successfully parsed: {self.messages}") rv = self.graph_document.graphs[0] if standardize: rv = rv.standardize() return rv
[docs] def guess(self, prefix: str) -> Graph: """Guess the right graph.""" if self.graph_document is None: raise ValueError("no graph document") return self.graph_document.guess(prefix)
[docs] def guess_version(self, prefix: str) -> Optional[str]: """Guess the version.""" try: graph = self.guess(prefix) except ValueError: return None else: return graph.version or graph.version_iri
[docs] def write(self, path: Union[str, Path]) -> None: """Write the graph document to a file in JSON.""" if not self.graph_document: raise ValueError path = Path(path) path.write_text( self.graph_document.json( indent=2, sort_keys=True, exclude_unset=True, exclude_none=True ) )
[docs] def get_obograph_by_iri( iri: str, ) -> ParseResults: """Get an ontology by its OBO Graph JSON iri.""" res_json = requests.get(iri).json() correct_raw_json(res_json) graph_document = GraphDocument.parse_obj(res_json) return ParseResults(graph_document=graph_document, iri=iri)
[docs] def get_obograph_by_path(path: Union[str, Path], *, iri: Optional[str] = None) -> ParseResults: """Get an ontology by its OBO Graph JSON file path.""" res_json = json.loads(Path(path).resolve().read_text()) correct_raw_json(res_json) graph_document = GraphDocument.parse_obj(res_json) if iri is None: if graph_document.graphs and len(graph_document.graphs) == 1: iri = graph_document.graphs[0].id return ParseResults(graph_document=graph_document, iri=iri)
GETTER_MESSAGES = []
[docs] def get_obograph_by_prefix( prefix: str, *, json_path: Union[None, str, Path] = None, cache: bool = False, check: bool = True, reason: bool = True, ) -> ParseResults: """Get an ontology by its Bioregistry prefix.""" if prefix != bioregistry.normalize_prefix(prefix): raise ValueError(f"this function requires bioregistry canonical prefixes: {prefix}") messages = [] json_iri = bioregistry.get_json_download(prefix) if json_iri is not None: try: parse_results = get_obograph_by_iri(json_iri) except (IOError, ValueError, TypeError) as e: msg = f"[{prefix}] could not parse JSON from {json_iri}: {e}" messages.append(msg) GETTER_MESSAGES.append(msg) logger.warning(msg) else: return parse_results owl_iri = bioregistry.get_owl_download(prefix) obo_iri = bioregistry.get_obo_download(prefix) for label, iri in [("OWL", owl_iri), ("OBO", obo_iri)]: if iri is None: continue try: if cache: with tempfile.TemporaryDirectory() as d: path = os.path.join(d, name_from_url(iri)) download(iri, path=path) parse_results = convert_to_obograph_local( path, json_path=json_path, from_iri=iri, check=check ) else: parse_results = convert_to_obograph_remote( iri, json_path=json_path, check=check, reason=reason ) except (subprocess.CalledProcessError, KeyError): msg = f"[{prefix}] could not parse {label} from {iri}" messages.append(msg) GETTER_MESSAGES.append(msg) logger.warning(msg) continue else: # stick all messages before parse_results.messages = [*messages, *parse_results.messages] return parse_results return ParseResults(graph_document=None, messages=messages)
[docs] def convert_to_obograph_local( path: Union[str, Path], *, json_path: Union[None, str, Path] = None, from_iri: Optional[str] = None, check: bool = True, ) -> ParseResults: """Convert a local OWL/OBO file to an OBO Graph JSON object. :param path: The path to a local OWL or OBO file :param json_path: The optional path to store the intermediate OBO Graph JSON file generated by ROBOT. If not given, the OBO Graph JSON file will be put in a temporary directory and deleted after the function finishes. :param from_iri: Use this parameter to say what IRI the graph came from :param check: By default, the OBO writer strictly enforces `document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`_. If an ontology violates these, the convert to OBO operation will fail. These checks can be ignored by setting this to false. :returns: An object with the parsed OBO Graph JSON and text output from the ROBOT conversion program """ return convert_to_obograph( input_path=path, input_flag="-i", json_path=json_path, from_iri=from_iri, check=check )
[docs] def convert_to_obograph_remote( iri: str, *, json_path: Union[None, str, Path] = None, check: bool = True, reason: bool = True, ) -> ParseResults: """Convert a remote OWL/OBO file to an OBO Graph JSON object. :param iri: The IRI for a remote OWL or OBO file :param json_path: The optional path to store the intermediate OBO Graph JSON file generated by ROBOT. If not given, the OBO Graph JSON file will be put in a temporary directory and deleted after the function finishes. :param check: By default, the OBO writer strictly enforces `document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`. If an ontology violates these, the convert to OBO operation will fail. These checks can be ignored by setting this to false. :param reason: Turn on ontology reasoning :returns: An object with the parsed OBO Graph JSON and text output from the ROBOT conversion program """ return convert_to_obograph( input_path=iri, input_flag="-I", json_path=json_path, input_is_iri=True, check=check, reason=reason, )
[docs] def convert_to_obograph( input_path: Union[str, Path], *, input_flag: Optional[Literal["-i", "-I"]] = None, json_path: Union[None, str, Path] = None, input_is_iri: bool = False, extra_args: Optional[List[str]] = None, from_iri: Optional[str] = None, merge: bool = True, check: bool = True, reason: bool = True, ) -> ParseResults: """Convert a local OWL file to a JSON file. :param input_path: Either a local file path or IRI. If a local file path is used, pass ``"-i"`` to ``flag``. If an IRI is used, pass ``"-I"`` to ``flag``. :param input_flag: The flag to denote if the file is local or remote. Tries to infer from input string if none is given :param json_path: The optional path to store the intermediate OBO Graph JSON file generated by ROBOT. If not given, the OBO Graph JSON file will be put in a temporary directory and deleted after the function finishes. :param input_is_iri: Should the ``input_path`` varible be considered as an IRI that gets stored in the returned parse results? :param extra_args: Extra positional arguments to pass in the command line :param from_iri: Use this parameter to say what IRI the graph came from :param merge: Use ROBOT's merge command to squash all graphs together :param check: By default, the OBO writer strictly enforces `document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`. If an ontology violates these, the convert to OBO operation will fail. These checks can be ignored by setting this to false. :param reason: Turn on ontology reasoning :returns: An object with the parsed OBO Graph JSON and text output from the ROBOT conversion program :raises ValueError: if a graph is missing an ID :raises TypeError: if ``input_as_iri`` is marked as true but a path object is given for the ``input_path`` """ if input_is_iri and not isinstance(input_path, str): raise TypeError if input_is_iri and from_iri is not None: raise ValueError("can't specifiy from_iri when input is IRI") with _path_context(json_path) as path: ret = convert( input_path=input_path, input_flag=input_flag, output_path=path, fmt="json", extra_args=extra_args, merge=merge, check=check, reason=reason, ) messages = ret.strip().splitlines() graph_document_raw = json.loads(path.read_text()) graphs_raw = graph_document_raw["graphs"] if len(graphs_raw) == 1 and "id" not in graphs_raw[0]: if input_is_iri: logger.warning( f"{input_path} has a single graph, missing an ID. assigning with IRI" ) graphs_raw[0]["id"] = input_path elif from_iri is not None: logger.warning( f"{input_path} has a single graph, missing an ID. assigning with IRI: {from_iri}" ) graphs_raw[0]["id"] = from_iri else: raise ValueError(f"{input_path} only graph is missing id") else: missing = [i for i, graph in enumerate(graphs_raw) if "id" not in graph] if missing: raise ValueError(f"{input_path} graphs missing IDs: {missing}") correct_raw_json(graph_document_raw) graph_document = GraphDocument.parse_obj(graph_document_raw) return ParseResults( graph_document=graph_document, messages=messages, iri=input_path if input_is_iri else None, # type:ignore )
def correct_raw_json(graph_document_raw) -> None: """Correct issues in raw graph documents, in place.""" for graph in graph_document_raw["graphs"]: _clean_raw_meta(graph) for node in graph["nodes"]: _clean_raw_meta(node) graph["nodes"] = [node for node in graph["nodes"] if "type" in node] return graph_document_raw def _clean_raw_meta(element): meta = element.get("meta") if not meta: return basic_property_values = meta.get("basicPropertyValues") if basic_property_values: meta["basicPropertyValues"] = [ basic_property_value for basic_property_value in basic_property_values if basic_property_value.get("pred") and basic_property_value.get("val") ] definition = meta.get("definition") if definition is not None and not definition.get("val"): del meta["definition"] xrefs = meta.get("xrefs") if xrefs: meta["xrefs"] = [xref for xref in xrefs if xref.get("val")] # What's the point of a synonym with an empty value? Nothing! synonyms = meta.get("synonyms") if synonyms: meta["synonyms"] = [synonym for synonym in synonyms if synonym.get("val")] #: Prefixes that denote remote resources PROTOCOLS = { "https://", "http://", "ftp://", "ftps://", } def _is_remote(url: Union[str, Path]) -> bool: return isinstance(url, str) and any(url.startswith(protocol) for protocol in PROTOCOLS) @contextmanager def _path_context(path: Union[None, str, Path], name: str = "output.json"): if path is not None: yield Path(path).resolve() else: with tempfile.TemporaryDirectory() as directory: yield Path(directory).joinpath(name)
[docs] def convert( input_path: Union[str, Path], output_path: Union[str, Path], input_flag: Optional[Literal["-i", "-I"]] = None, *, merge: bool = True, fmt: Optional[str] = None, check: bool = True, reason: bool = False, extra_args: Optional[List[str]] = None, ) -> str: """Convert an OBO file to an OWL file with ROBOT. :param input_path: Either a local file path or IRI. If a local file path is used, pass ``"-i"`` to ``flag``. If an IRI is used, pass ``"-I"`` to ``flag``. :param output_path: The local file path to save the converted ontology to. Will infer format from the extension, otherwise, use the ``fmt`` param. :param input_flag: The flag to denote if the file is local or remote. Tries to infer from input string if none is given :param merge: Use ROBOT's merge command to squash all graphs together :param fmt: Explicitly set the format :param check: By default, the OBO writer strictly enforces `document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`. If an ontology violates these, the convert to OBO operation will fail. These checks can be ignored by setting this to false. :param reason: Turn on ontology reasoning :param extra_args: Extra positional arguments to pass in the command line :return: Output from standard out from running ROBOT """ if input_flag is None: input_flag = "-I" if _is_remote(input_path) else "-i" args = list(ROBOT_COMMAND) if merge and not reason: args.extend(["merge", input_flag, str(input_path), "convert"]) elif merge and reason: args.extend( [ "merge", input_flag, str(input_path), "reason", "convert", ] ) elif not merge and reason: args.extend( [ "reason", input_flag, str(input_path), "convert", ] ) else: args.extend( [ "convert", input_flag, str(input_path), ] ) args.extend(("-o", str(output_path))) if extra_args: args.extend(extra_args) if not check: args.append("--check=false") if fmt: args.extend(("--format", fmt)) logger.debug("Running shell command: %s", args) ret = check_output( # noqa:S603 args, cwd=os.path.dirname(__file__), ) return ret.decode()
def write_getter_warnings(path: Union[str, Path]) -> None: """Write warned unparsable.""" path = Path(path).resolve() path.write_text("\n".join(GETTER_MESSAGES))