__all__ = ['model_from_owl_str', 'model_from_owl_file', 'model_to_owl_str',
'model_to_owl_file', 'model_from_owl_url', 'model_from_pc_query',
'model_from_reactome', 'model_from_ecocyc', 'model_from_metacyc',
'model_from_biocyc', 'model_from_humancyc', 'model_from_netpath',
'model_from_owl_gz', 'PYBIOPAX_TQDM_CONFIG'
]
import gzip
import os
import pathlib
import requests
from lxml import etree
from typing import Any, Mapping, Optional, Union
from .biopax.model import BioPaxModel, PYBIOPAX_TQDM_CONFIG
from .xml_util import xml_to_str, xml_to_file
from .pc_client import graph_query
[docs]def model_from_owl_str(owl_str: str) -> BioPaxModel:
"""Return a BioPAX Model from an OWL string.
Parameters
----------
owl_str :
A OWL string of BioPAX content.
Returns
-------
pybiopax.biopax.BioPaxModel
A BioPAX Model deserialized from the OWL string.
"""
return BioPaxModel.from_xml(etree.fromstring(owl_str.encode('utf-8')))
[docs]def model_from_owl_file(fname: Union[str, pathlib.Path, os.PathLike],
encoding: Optional[str] = None) \
-> BioPaxModel:
"""Return a BioPAX Model from an OWL string.
Parameters
----------
fname :
A path to an OWL file of BioPAX content.
encoding :
The encoding type to be passed to :func:`open`.
Returns
-------
:
A BioPAX Model deserialized from the OWL file.
"""
with open(fname, 'r', encoding=encoding) as fh:
owl_str = fh.read()
return model_from_owl_str(owl_str)
[docs]def model_from_owl_gz(
path: Union[str, pathlib.Path, os.PathLike],
encoding: Optional[str] = None,
) -> BioPaxModel:
"""Return a BioPAX Model from an OWL file (gzipped).
Parameters
----------
path :
A path to a gzipped OWL file of BioPAX content.
encoding :
The encoding to read the file with. Defaults to the
system default. Sometimes, windows users will need to
explicitly set this to utf-8.
Returns
-------
:
A BioPAX Model deserialized from the OWL file.
"""
with gzip.open(path, 'rt', encoding=encoding) as fh:
return BioPaxModel.from_xml(etree.parse(fh).getroot())
def model_from_owl_gz_str(owl_gz_str: bytes) -> BioPaxModel:
"""Return a BioPAX Model from an OWL string.
Parameters
----------
owl_gz_str :
A OWL string of BioPAX content.
Returns
-------
pybiopax.biopax.BioPaxModel
A BioPAX Model deserialized from the OWL string.
"""
return model_from_owl_str(gzip.decompress(owl_gz_str).decode('utf-8'))
[docs]def model_from_owl_url(url: str,
request_params: Optional[Mapping[str, Any]] = None) \
-> BioPaxModel:
"""Return a BioPAX Model from an URL pointing to an OWL file.
Parameters
----------
url :
A OWL URL with BioPAX content.
request_params :
Additional keyword arguments to pass to :func:`requests.get`
Returns
-------
:
A BioPAX Model deserialized from the OWL file.
"""
request_params = {} if not request_params else request_params
res = requests.get(url, **request_params)
res.raise_for_status()
if url.endswith('gz'):
return model_from_owl_gz_str(res.content)
else:
return model_from_owl_str(res.text)
[docs]def model_from_pc_query(kind, source, target=None, **query_params):
"""Return a BioPAX Model from a Pathway Commons query.
For more information on these queries, see
http://www.pathwaycommons.org/pc2/#graph
Parameters
----------
kind : str
The kind of graph query to perform. Currently 3 options are
implemented, 'neighborhood', 'pathsbetween' and 'pathsfromto'.
source : list[str]
A single gene name or a list of gene names which are the source set for
the graph query.
target : Optional[list[str]]
A single gene name or a list of gene names which are the target set for
the graph query. Only needed for 'pathsfromto' queries.
limit : Optional[int]
This limits the length of the longest path considered in
the graph query. Default: 1
organism : Optional[str]
The organism used for the query. Default: '9606' corresponding
to human.
datasource : Optional[list[str]]
A list of database sources that the query results should include.
Example: ['pid', 'panther']. By default, all databases are considered.
Returns
-------
pybiopax.biopax.BioPaxModel
A BioPAX Model obtained from the results of the Pathway Commons query.
"""
owl_str = graph_query(kind, source, target=target, **query_params)
return model_from_owl_str(owl_str)
[docs]def model_from_netpath(identifier: str) -> BioPaxModel:
"""Return a BioPAX model from a `NetPath <http://netpath.org>`_ entry.
Parameters
----------
identifier :
The NetPath identifier for a pathway (e.g., ``22`` for the `leptin
signaling pathway <http://netpath.org/pathways?path_id=NetPath_22>`_
Returns
-------
:
A BioPAX model obtained from the NetPath resource.
"""
url = f"http://netpath.org/data/biopax/NetPath_{identifier}.owl"
return model_from_owl_url(url)
[docs]def model_from_reactome(identifier: str) -> BioPaxModel:
"""Return a BioPAX model from a Reactome entry (pathway, event, etc.).
Parameters
----------
identifier :
The Reactome identifier for a pathway (e.g., ``177929`` for `Signaling
by EGFR <https://reactome.org/content/detail/R-HSA-177929>`_)
or reaction (e.g., ``177946`` for `Pro-EGF is cleaved to form mature
EGF <https://reactome.org/content/detail/R-HSA-177946>`_). For human
pathways, the identifier for the BioPAX download is the same as the part
that comes after ``R-HSA-``. For non-human pathways, this is not so
clear.
Returns
-------
:
A BioPAX model obtained from the Reactome resource.
"""
if identifier.startswith("R-HSA-"):
# If you give something like R-XXX-YYYYY, just get the YYYYY part back
# for download.
identifier = identifier[len("R-HSA-"):]
url = f"https://reactome.org/ReactomeRESTfulAPI/RESTfulWS/biopaxExporter/" \
f"Level3/{identifier}"
return model_from_owl_url(url)
[docs]def model_from_humancyc(identifier: str) -> BioPaxModel:
"""Return a BioPAX model from a HumanCyc entry.
Parameters
----------
identifier :
The HumanCyc identifier for a pathway (e.g., ``PWY66-398`` for
`TCA cycle
<https://humancyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-398>`_)
Returns
-------
:
A BioPAX model obtained from the HumanCyc pathway.
"""
return _model_from_xcyc("https://humancyc.org/HUMAN/pathway-biopax",
identifier)
[docs]def model_from_biocyc(identifier: str) -> BioPaxModel:
"""Return a BioPAX model from a `BioCyc <https://biocyc.org>`_ entry.
BioCyc contains pathways for model eukaryotes and microbes.
Parameters
----------
identifier :
The BioCyc identifier for a pathway (e.g., ``P105-PWY`` for
`TCA cycle IV
(2-oxoglutarate decarboxylase) <https://biocyc.org/META/NEW-IMAGE?
type=PATHWAY&object=P105-PWY>`_)
Returns
-------
:
A BioPAX model obtained from the BioCyc pathway.
"""
return _model_from_xcyc("https://biocyc.org/META/pathway-biopax",
identifier)
[docs]def model_from_ecocyc(identifier: str) -> BioPaxModel:
"""Return a BioPAX model from a `EcoCyc <https://ecocyc.org/>`_ entry.
EcoCyc contains pathways for Escherichia coli K-12 MG1655.
Parameters
----------
identifier :
The EcoCyc identifier for a pathway (e.g., ``TCA`` for
`TCA cycle I (prokaryotic) <https://ecocyc.org/ECOLI/NEW-IMAGE?type=PATHWAY&object=TCA>`_)
Returns
-------
:
A BioPAX model obtained from the EcoCyc pathway.
"""
return _model_from_xcyc("https://ecocyc.org/ECOLI/pathway-biopax",
identifier)
def _model_from_xcyc(url: str, identifier: str) -> BioPaxModel:
"""Return a BioPAX model from one of the Cyc databases entry.
Parameters
----------
url :
The base url for the XXXCyc BioPAX download endpoint. All of them have
the form ``https://....../META/pathway-biopax``.
identifier :
The site-specific identifier for a pathway
Returns
-------
:
A BioPAX model obtained from the pathway.
"""
# Extend URL with arguments
url = url + f'?type=3&object={identifier}'
# Not sure if the SSL issue is temporary. Remove verify=False later
return model_from_owl_url(url, request_params={'verify': False})
[docs]def model_to_owl_str(model: BioPaxModel) -> str:
"""Return an OWL string serialized from a BioPaxModel object.
Parameters
----------
model :
The BioPaxModel to serialize into an OWL string.
Returns
-------
:
The OWL string for the model.
"""
return xml_to_str(model.to_xml())
[docs]def model_to_owl_file(model: BioPaxModel,
fname: Union[str, pathlib.Path, os.PathLike]):
"""Write an OWL string serialized from a BioPaxModel object into a file.
Parameters
----------
model :
The BioPaxModel to serialize into an OWL file.
fname :
The path to the target OWL file.
"""
xml_to_file(model.to_xml(), fname)