Source code for pybiopax.api

__all__ = ['model_from_owl_str', 'model_from_owl_file', 'model_to_owl_str',
           'model_to_owl_file', 'model_from_owl_url', 'model_from_pc_query',
           'model_from_reactome', 'model_from_ecocyc', 'model_from_metacyc',
           'model_from_biocyc', 'model_from_humancyc', 'model_from_netpath',
           'model_from_owl_gz', 'PYBIOPAX_TQDM_CONFIG'
           ]

import gzip
import os
import pathlib

import requests
from lxml import etree
from typing import Any, Mapping, Optional, Union
from .biopax.model import BioPaxModel, PYBIOPAX_TQDM_CONFIG
from .xml_util import xml_to_str, xml_to_file
from .pc_client import graph_query


[docs]def model_from_owl_str(owl_str: str) -> BioPaxModel: """Return a BioPAX Model from an OWL string. Parameters ---------- owl_str : A OWL string of BioPAX content. Returns ------- pybiopax.biopax.BioPaxModel A BioPAX Model deserialized from the OWL string. """ return BioPaxModel.from_xml(etree.fromstring(owl_str.encode('utf-8')))
[docs]def model_from_owl_file(fname: Union[str, pathlib.Path, os.PathLike], encoding: Optional[str] = None) \ -> BioPaxModel: """Return a BioPAX Model from an OWL string. Parameters ---------- fname : A path to an OWL file of BioPAX content. encoding : The encoding type to be passed to :func:`open`. Returns ------- : A BioPAX Model deserialized from the OWL file. """ with open(fname, 'r', encoding=encoding) as fh: owl_str = fh.read() return model_from_owl_str(owl_str)
[docs]def model_from_owl_gz( path: Union[str, pathlib.Path, os.PathLike], encoding: Optional[str] = None, ) -> BioPaxModel: """Return a BioPAX Model from an OWL file (gzipped). Parameters ---------- path : A path to a gzipped OWL file of BioPAX content. encoding : The encoding to read the file with. Defaults to the system default. Sometimes, windows users will need to explicitly set this to utf-8. Returns ------- : A BioPAX Model deserialized from the OWL file. """ with gzip.open(path, 'rt', encoding=encoding) as fh: return BioPaxModel.from_xml(etree.parse(fh).getroot())
def model_from_owl_gz_str(owl_gz_str: bytes) -> BioPaxModel: """Return a BioPAX Model from an OWL string. Parameters ---------- owl_gz_str : A OWL string of BioPAX content. Returns ------- pybiopax.biopax.BioPaxModel A BioPAX Model deserialized from the OWL string. """ return model_from_owl_str(gzip.decompress(owl_gz_str).decode('utf-8'))
[docs]def model_from_owl_url(url: str, request_params: Optional[Mapping[str, Any]] = None) \ -> BioPaxModel: """Return a BioPAX Model from an URL pointing to an OWL file. Parameters ---------- url : A OWL URL with BioPAX content. request_params : Additional keyword arguments to pass to :func:`requests.get` Returns ------- : A BioPAX Model deserialized from the OWL file. """ request_params = {} if not request_params else request_params res = requests.get(url, **request_params) res.raise_for_status() if url.endswith('gz'): return model_from_owl_gz_str(res.content) else: return model_from_owl_str(res.text)
[docs]def model_from_pc_query(kind, source, target=None, **query_params): """Return a BioPAX Model from a Pathway Commons query. For more information on these queries, see http://www.pathwaycommons.org/pc2/#graph Parameters ---------- kind : str The kind of graph query to perform. Currently 3 options are implemented, 'neighborhood', 'pathsbetween' and 'pathsfromto'. source : list[str] A single gene name or a list of gene names which are the source set for the graph query. target : Optional[list[str]] A single gene name or a list of gene names which are the target set for the graph query. Only needed for 'pathsfromto' queries. limit : Optional[int] This limits the length of the longest path considered in the graph query. Default: 1 organism : Optional[str] The organism used for the query. Default: '9606' corresponding to human. datasource : Optional[list[str]] A list of database sources that the query results should include. Example: ['pid', 'panther']. By default, all databases are considered. Returns ------- pybiopax.biopax.BioPaxModel A BioPAX Model obtained from the results of the Pathway Commons query. """ owl_str = graph_query(kind, source, target=target, **query_params) return model_from_owl_str(owl_str)
[docs]def model_from_netpath(identifier: str) -> BioPaxModel: """Return a BioPAX model from a `NetPath <http://netpath.org>`_ entry. Parameters ---------- identifier : The NetPath identifier for a pathway (e.g., ``22`` for the `leptin signaling pathway <http://netpath.org/pathways?path_id=NetPath_22>`_ Returns ------- : A BioPAX model obtained from the NetPath resource. """ url = f"http://netpath.org/data/biopax/NetPath_{identifier}.owl" return model_from_owl_url(url)
[docs]def model_from_reactome(identifier: str) -> BioPaxModel: """Return a BioPAX model from a Reactome entry (pathway, event, etc.). Parameters ---------- identifier : The Reactome identifier for a pathway (e.g., ``177929`` for `Signaling by EGFR <https://reactome.org/content/detail/R-HSA-177929>`_) or reaction (e.g., ``177946`` for `Pro-EGF is cleaved to form mature EGF <https://reactome.org/content/detail/R-HSA-177946>`_). For human pathways, the identifier for the BioPAX download is the same as the part that comes after ``R-HSA-``. For non-human pathways, this is not so clear. Returns ------- : A BioPAX model obtained from the Reactome resource. """ if identifier.startswith("R-HSA-"): # If you give something like R-XXX-YYYYY, just get the YYYYY part back # for download. identifier = identifier[len("R-HSA-"):] url = f"https://reactome.org/ReactomeRESTfulAPI/RESTfulWS/biopaxExporter/" \ f"Level3/{identifier}" return model_from_owl_url(url)
[docs]def model_from_humancyc(identifier: str) -> BioPaxModel: """Return a BioPAX model from a HumanCyc entry. Parameters ---------- identifier : The HumanCyc identifier for a pathway (e.g., ``PWY66-398`` for `TCA cycle <https://humancyc.org/HUMAN/NEW-IMAGE?type=PATHWAY&object=PWY66-398>`_) Returns ------- : A BioPAX model obtained from the HumanCyc pathway. """ return _model_from_xcyc("https://humancyc.org/HUMAN/pathway-biopax", identifier)
[docs]def model_from_biocyc(identifier: str) -> BioPaxModel: """Return a BioPAX model from a `BioCyc <https://biocyc.org>`_ entry. BioCyc contains pathways for model eukaryotes and microbes. Parameters ---------- identifier : The BioCyc identifier for a pathway (e.g., ``P105-PWY`` for `TCA cycle IV (2-oxoglutarate decarboxylase) <https://biocyc.org/META/NEW-IMAGE? type=PATHWAY&object=P105-PWY>`_) Returns ------- : A BioPAX model obtained from the BioCyc pathway. """ return _model_from_xcyc("https://biocyc.org/META/pathway-biopax", identifier)
[docs]def model_from_metacyc(identifier: str) -> BioPaxModel: """Return a BioPAX model from a `MetaCyc <https://metacyc.org/>`_ entry. MetaCyc contains pathways for all organisms Parameters ---------- identifier : The MetaCyc identifier for a pathway (e.g., ``TCA`` for `TCA cycle I (prokaryotic) <https://metacyc.org/META/NEW-IMAGE?type=PATHWAY&object=TCA>`_) Returns ------- : A BioPAX model obtained from the MetaCyc pathway. """ return _model_from_xcyc("https://metacyc.org/META/pathway-biopax", identifier)
[docs]def model_from_ecocyc(identifier: str) -> BioPaxModel: """Return a BioPAX model from a `EcoCyc <https://ecocyc.org/>`_ entry. EcoCyc contains pathways for Escherichia coli K-12 MG1655. Parameters ---------- identifier : The EcoCyc identifier for a pathway (e.g., ``TCA`` for `TCA cycle I (prokaryotic) <https://ecocyc.org/ECOLI/NEW-IMAGE?type=PATHWAY&object=TCA>`_) Returns ------- : A BioPAX model obtained from the EcoCyc pathway. """ return _model_from_xcyc("https://ecocyc.org/ECOLI/pathway-biopax", identifier)
def _model_from_xcyc(url: str, identifier: str) -> BioPaxModel: """Return a BioPAX model from one of the Cyc databases entry. Parameters ---------- url : The base url for the XXXCyc BioPAX download endpoint. All of them have the form ``https://....../META/pathway-biopax``. identifier : The site-specific identifier for a pathway Returns ------- : A BioPAX model obtained from the pathway. """ # Extend URL with arguments url = url + f'?type=3&object={identifier}' # Not sure if the SSL issue is temporary. Remove verify=False later return model_from_owl_url(url, request_params={'verify': False})
[docs]def model_to_owl_str(model: BioPaxModel) -> str: """Return an OWL string serialized from a BioPaxModel object. Parameters ---------- model : The BioPaxModel to serialize into an OWL string. Returns ------- : The OWL string for the model. """ return xml_to_str(model.to_xml())
[docs]def model_to_owl_file(model: BioPaxModel, fname: Union[str, pathlib.Path, os.PathLike]): """Write an OWL string serialized from a BioPaxModel object into a file. Parameters ---------- model : The BioPaxModel to serialize into an OWL file. fname : The path to the target OWL file. """ xml_to_file(model.to_xml(), fname)