# -*- coding: utf-8 -*-
"""A wrapper around ROBOT functionality.
.. seealso:: https://robot.obolibrary.org
"""
import dataclasses
import json
import logging
import os
import subprocess
import tempfile
from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
from subprocess import check_output
from typing import List, Optional, Union
import bioregistry
import pystow
import requests
from pystow.utils import download, name_from_url
from typing_extensions import Literal
from .obograph import Graph, GraphDocument
__all__ = [
"is_available",
"ParseResults",
# Conversions
"convert",
"convert_to_obograph_local",
"convert_to_obograph_remote",
"convert_to_obograph",
# Processors
"get_obograph_by_prefix",
"get_obograph_by_iri",
"get_obograph_by_path",
]
logger = logging.getLogger(__name__)
LATEST = "1.9.4"
ROBOT_URL = f"https://github.com/ontodev/robot/releases/download/v{LATEST}/robot.jar"
ROBOT_MODULE = pystow.module("robot")
ROBOT_PATH = ROBOT_MODULE.ensure(url=ROBOT_URL)
ROBOT_COMMAND = ["java", "-jar", str(ROBOT_PATH)]
[docs]
def is_available() -> bool:
"""Check if ROBOT is available."""
from shutil import which
if which("java") is None:
# suggested in https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
logger.error("java is not on the PATH")
return False
try:
check_output(["java", "--help"]) # noqa:S607
except Exception:
logger.error(
"java --help failed - this means the java runtime environment (JRE) "
"might not be configured properly"
)
return False
if not ROBOT_PATH.is_file():
logger.error("ROBOT was not successfully downloaded to %s", ROBOT_PATH)
# ROBOT was unsuccessfully downloaded
return False
try:
# Check
check_output([*ROBOT_COMMAND, "--help"])
except Exception:
logger.error("ROBOT was downloaded to %s but could not be run with --help", ROBOT_PATH)
return False
return True
[docs]
@dataclass
class ParseResults:
"""A dataclass containing an OBO Graph JSON and text output from ROBOT."""
graph_document: Optional[GraphDocument]
messages: List[str] = dataclasses.field(default_factory=list)
iri: Optional[str] = None
[docs]
def squeeze(self, standardize: bool = False) -> Graph:
"""Get the first graph."""
if self.graph_document is None:
raise ValueError(f"graph document was not successfully parsed: {self.messages}")
rv = self.graph_document.graphs[0]
if standardize:
rv = rv.standardize()
return rv
[docs]
def guess(self, prefix: str) -> Graph:
"""Guess the right graph."""
if self.graph_document is None:
raise ValueError("no graph document")
return self.graph_document.guess(prefix)
[docs]
def guess_version(self, prefix: str) -> Optional[str]:
"""Guess the version."""
try:
graph = self.guess(prefix)
except ValueError:
return None
else:
return graph.version or graph.version_iri
[docs]
def write(self, path: Union[str, Path]) -> None:
"""Write the graph document to a file in JSON."""
if not self.graph_document:
raise ValueError
path = Path(path)
path.write_text(
self.graph_document.json(
indent=2, sort_keys=True, exclude_unset=True, exclude_none=True
)
)
[docs]
def get_obograph_by_iri(
iri: str,
) -> ParseResults:
"""Get an ontology by its OBO Graph JSON iri."""
res_json = requests.get(iri).json()
correct_raw_json(res_json)
graph_document = GraphDocument.parse_obj(res_json)
return ParseResults(graph_document=graph_document, iri=iri)
[docs]
def get_obograph_by_path(path: Union[str, Path], *, iri: Optional[str] = None) -> ParseResults:
"""Get an ontology by its OBO Graph JSON file path."""
res_json = json.loads(Path(path).resolve().read_text())
correct_raw_json(res_json)
graph_document = GraphDocument.parse_obj(res_json)
if iri is None:
if graph_document.graphs and len(graph_document.graphs) == 1:
iri = graph_document.graphs[0].id
return ParseResults(graph_document=graph_document, iri=iri)
GETTER_MESSAGES = []
[docs]
def get_obograph_by_prefix(
prefix: str,
*,
json_path: Union[None, str, Path] = None,
cache: bool = False,
check: bool = True,
reason: bool = True,
) -> ParseResults:
"""Get an ontology by its Bioregistry prefix."""
if prefix != bioregistry.normalize_prefix(prefix):
raise ValueError(f"this function requires bioregistry canonical prefixes: {prefix}")
messages = []
json_iri = bioregistry.get_json_download(prefix)
if json_iri is not None:
try:
parse_results = get_obograph_by_iri(json_iri)
except (IOError, ValueError, TypeError) as e:
msg = f"[{prefix}] could not parse JSON from {json_iri}: {e}"
messages.append(msg)
GETTER_MESSAGES.append(msg)
logger.warning(msg)
else:
return parse_results
owl_iri = bioregistry.get_owl_download(prefix)
obo_iri = bioregistry.get_obo_download(prefix)
for label, iri in [("OWL", owl_iri), ("OBO", obo_iri)]:
if iri is None:
continue
try:
if cache:
with tempfile.TemporaryDirectory() as d:
path = os.path.join(d, name_from_url(iri))
download(iri, path=path)
parse_results = convert_to_obograph_local(
path, json_path=json_path, from_iri=iri, check=check
)
else:
parse_results = convert_to_obograph_remote(
iri, json_path=json_path, check=check, reason=reason
)
except (subprocess.CalledProcessError, KeyError):
msg = f"[{prefix}] could not parse {label} from {iri}"
messages.append(msg)
GETTER_MESSAGES.append(msg)
logger.warning(msg)
continue
else:
# stick all messages before
parse_results.messages = [*messages, *parse_results.messages]
return parse_results
return ParseResults(graph_document=None, messages=messages)
[docs]
def convert_to_obograph_local(
path: Union[str, Path],
*,
json_path: Union[None, str, Path] = None,
from_iri: Optional[str] = None,
check: bool = True,
) -> ParseResults:
"""Convert a local OWL/OBO file to an OBO Graph JSON object.
:param path: The path to a local OWL or OBO file
:param json_path: The optional path to store the intermediate
OBO Graph JSON file generated by ROBOT. If not given, the
OBO Graph JSON file will be put in a temporary directory
and deleted after the function finishes.
:param from_iri: Use this parameter to say what IRI the graph came from
:param check:
By default, the OBO writer strictly enforces
`document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`_.
If an ontology violates these, the convert to OBO operation will fail.
These checks can be ignored by setting this to false.
:returns: An object with the parsed OBO Graph JSON and text
output from the ROBOT conversion program
"""
return convert_to_obograph(
input_path=path, input_flag="-i", json_path=json_path, from_iri=from_iri, check=check
)
[docs]
def convert_to_obograph_remote(
iri: str,
*,
json_path: Union[None, str, Path] = None,
check: bool = True,
reason: bool = True,
) -> ParseResults:
"""Convert a remote OWL/OBO file to an OBO Graph JSON object.
:param iri: The IRI for a remote OWL or OBO file
:param json_path: The optional path to store the intermediate
OBO Graph JSON file generated by ROBOT. If not given, the
OBO Graph JSON file will be put in a temporary directory
and deleted after the function finishes.
:param check:
By default, the OBO writer strictly enforces
`document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`.
If an ontology violates these, the convert to OBO operation will fail.
These checks can be ignored by setting this to false.
:param reason:
Turn on ontology reasoning
:returns: An object with the parsed OBO Graph JSON and text
output from the ROBOT conversion program
"""
return convert_to_obograph(
input_path=iri,
input_flag="-I",
json_path=json_path,
input_is_iri=True,
check=check,
reason=reason,
)
[docs]
def convert_to_obograph(
input_path: Union[str, Path],
*,
input_flag: Optional[Literal["-i", "-I"]] = None,
json_path: Union[None, str, Path] = None,
input_is_iri: bool = False,
extra_args: Optional[List[str]] = None,
from_iri: Optional[str] = None,
merge: bool = True,
check: bool = True,
reason: bool = True,
) -> ParseResults:
"""Convert a local OWL file to a JSON file.
:param input_path: Either a local file path or IRI. If a local file path
is used, pass ``"-i"`` to ``flag``. If an IRI is used, pass ``"-I"``
to ``flag``.
:param input_flag: The flag to denote if the file is local or remote.
Tries to infer from input string if none is given
:param json_path: The optional path to store the intermediate
OBO Graph JSON file generated by ROBOT. If not given, the
OBO Graph JSON file will be put in a temporary directory
and deleted after the function finishes.
:param input_is_iri:
Should the ``input_path`` varible be considered as an IRI that
gets stored in the returned parse results?
:param extra_args:
Extra positional arguments to pass in the command line
:param from_iri: Use this parameter to say what IRI the graph came from
:param merge: Use ROBOT's merge command to squash all graphs together
:param check:
By default, the OBO writer strictly enforces
`document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`.
If an ontology violates these, the convert to OBO operation will fail.
These checks can be ignored by setting this to false.
:param reason:
Turn on ontology reasoning
:returns: An object with the parsed OBO Graph JSON and text
output from the ROBOT conversion program
:raises ValueError: if a graph is missing an ID
:raises TypeError: if ``input_as_iri`` is marked as true but a path
object is given for the ``input_path``
"""
if input_is_iri and not isinstance(input_path, str):
raise TypeError
if input_is_iri and from_iri is not None:
raise ValueError("can't specifiy from_iri when input is IRI")
with _path_context(json_path) as path:
ret = convert(
input_path=input_path,
input_flag=input_flag,
output_path=path,
fmt="json",
extra_args=extra_args,
merge=merge,
check=check,
reason=reason,
)
messages = ret.strip().splitlines()
graph_document_raw = json.loads(path.read_text())
graphs_raw = graph_document_raw["graphs"]
if len(graphs_raw) == 1 and "id" not in graphs_raw[0]:
if input_is_iri:
logger.warning(
f"{input_path} has a single graph, missing an ID. assigning with IRI"
)
graphs_raw[0]["id"] = input_path
elif from_iri is not None:
logger.warning(
f"{input_path} has a single graph, missing an ID. assigning with IRI: {from_iri}"
)
graphs_raw[0]["id"] = from_iri
else:
raise ValueError(f"{input_path} only graph is missing id")
else:
missing = [i for i, graph in enumerate(graphs_raw) if "id" not in graph]
if missing:
raise ValueError(f"{input_path} graphs missing IDs: {missing}")
correct_raw_json(graph_document_raw)
graph_document = GraphDocument.parse_obj(graph_document_raw)
return ParseResults(
graph_document=graph_document,
messages=messages,
iri=input_path if input_is_iri else None, # type:ignore
)
def correct_raw_json(graph_document_raw) -> None:
"""Correct issues in raw graph documents, in place."""
for graph in graph_document_raw["graphs"]:
_clean_raw_meta(graph)
for node in graph["nodes"]:
_clean_raw_meta(node)
graph["nodes"] = [node for node in graph["nodes"] if "type" in node]
return graph_document_raw
def _clean_raw_meta(element):
meta = element.get("meta")
if not meta:
return
basic_property_values = meta.get("basicPropertyValues")
if basic_property_values:
meta["basicPropertyValues"] = [
basic_property_value
for basic_property_value in basic_property_values
if basic_property_value.get("pred") and basic_property_value.get("val")
]
definition = meta.get("definition")
if definition is not None and not definition.get("val"):
del meta["definition"]
xrefs = meta.get("xrefs")
if xrefs:
meta["xrefs"] = [xref for xref in xrefs if xref.get("val")]
# What's the point of a synonym with an empty value? Nothing!
synonyms = meta.get("synonyms")
if synonyms:
meta["synonyms"] = [synonym for synonym in synonyms if synonym.get("val")]
#: Prefixes that denote remote resources
PROTOCOLS = {
"https://",
"http://",
"ftp://",
"ftps://",
}
def _is_remote(url: Union[str, Path]) -> bool:
return isinstance(url, str) and any(url.startswith(protocol) for protocol in PROTOCOLS)
@contextmanager
def _path_context(path: Union[None, str, Path], name: str = "output.json"):
if path is not None:
yield Path(path).resolve()
else:
with tempfile.TemporaryDirectory() as directory:
yield Path(directory).joinpath(name)
[docs]
def convert(
input_path: Union[str, Path],
output_path: Union[str, Path],
input_flag: Optional[Literal["-i", "-I"]] = None,
*,
merge: bool = True,
fmt: Optional[str] = None,
check: bool = True,
reason: bool = False,
extra_args: Optional[List[str]] = None,
) -> str:
"""Convert an OBO file to an OWL file with ROBOT.
:param input_path: Either a local file path or IRI. If a local file path
is used, pass ``"-i"`` to ``flag``. If an IRI is used, pass ``"-I"``
to ``flag``.
:param output_path: The local file path to save the converted ontology to.
Will infer format from the extension, otherwise, use the ``fmt`` param.
:param input_flag: The flag to denote if the file is local or remote.
Tries to infer from input string if none is given
:param merge: Use ROBOT's merge command to squash all graphs together
:param fmt: Explicitly set the format
:param check:
By default, the OBO writer strictly enforces
`document structure rules <http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4>`.
If an ontology violates these, the convert to OBO operation will fail.
These checks can be ignored by setting this to false.
:param reason:
Turn on ontology reasoning
:param extra_args:
Extra positional arguments to pass in the command line
:return: Output from standard out from running ROBOT
"""
if input_flag is None:
input_flag = "-I" if _is_remote(input_path) else "-i"
args = list(ROBOT_COMMAND)
if merge and not reason:
args.extend(["merge", input_flag, str(input_path), "convert"])
elif merge and reason:
args.extend(
[
"merge",
input_flag,
str(input_path),
"reason",
"convert",
]
)
elif not merge and reason:
args.extend(
[
"reason",
input_flag,
str(input_path),
"convert",
]
)
else:
args.extend(
[
"convert",
input_flag,
str(input_path),
]
)
args.extend(("-o", str(output_path)))
if extra_args:
args.extend(extra_args)
if not check:
args.append("--check=false")
if fmt:
args.extend(("--format", fmt))
logger.debug("Running shell command: %s", args)
ret = check_output( # noqa:S603
args,
cwd=os.path.dirname(__file__),
)
return ret.decode()
def write_getter_warnings(path: Union[str, Path]) -> None:
"""Write warned unparsable."""
path = Path(path).resolve()
path.write_text("\n".join(GETTER_MESSAGES))