Source code for pybiopax.xml_util

import os
import pathlib
import re
from typing import Union

from lxml import etree
from lxml.builder import ElementMaker


namespaces = {
    'xsd': 'http://www.w3.org/2001/XMLSchema#',
    'owl': 'http://www.w3.org/2002/07/owl#',
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'bp': 'http://www.biopax.org/release/biopax-level3.owl#',
    'xml': 'http://www.w3.org/XML/1998/namespace'
}


makers = {
    ns: ElementMaker(namespace=prefix)
    for ns, prefix in namespaces.items()
}


[docs]def wrap_xml_elements(elements, xml_base): """Return a valid BioPAX OWL wrapping XML-serialized BioPAX objects.""" # We first make the RDF wrapper and add an Ontology element first rdfm = ElementMaker(namespace=namespaces['rdf'], nsmap=namespaces) rdf_element = rdfm('RDF', **{nselem('xml', 'base'): xml_base}) owl_element = makers['owl']('Ontology', **{nselem('rdf', 'about'): ''}) imports = makers['owl']('imports', **{nselem('rdf', 'resource'): namespaces['bp']}) owl_element.append(imports) rdf_element.append(owl_element) for element in elements: rdf_element.append(element) return rdf_element
[docs]def xml_to_str(xml): """Return the OWL string for an XML element tree.""" xmlb = etree.tostring(xml, pretty_print=True, encoding='utf-8', xml_declaration=True) xmls = xmlb.decode('utf-8') xmls = '\n'.join([re.sub(r'^ <bp', '\n<bp', x) for x in xmls.split('\n')]) xmls = '\n'.join([re.sub(r'^ </bp', '</bp', x) for x in xmls.split('\n')]) xmls = '\n'.join([re.sub(r'^ <', ' <', x) for x in xmls.split('\n')]) return xmls
[docs]def xml_to_file(xml, fname: Union[str, pathlib.Path, os.PathLike]): """Write an XML element tree to a given file.""" with open(fname, 'w') as fh: fh.write(xml_to_str(xml))
[docs]def nselem(ns, elem): """Return a full namespaced string with curly brackets with a suffix.""" return '{%s}%s' % (namespaces[ns], elem)
[docs]def nssuffix(ns, suffix): """Return a full namespaced string with a suffix.""" return '%s%s' % (namespaces[ns], suffix)
[docs]def get_datatype(attrib): """Return the RDF data type of an element attribute.""" return attrib.get(nselem('rdf', 'datatype'))
[docs]def get_resource(attrib): """Return the resource associated with an element attribute.""" res = attrib.get(nselem('rdf', 'resource')) if res: if res.startswith('#'): return res[1:] else: return res
[docs]def is_url(txt): """Return true if the given string is an URL.""" return txt.startswith('http')
[docs]def is_datatype(attrib, prefix, datatype): """Return True of the given attribute is of a given type.""" possibilities = {nssuffix(prefix, datatype), f'{prefix}:{datatype}'} return get_datatype(attrib) in possibilities
[docs]def get_tag(element): """Return the tag of an element.""" return re.match(r'.*}(.+)', element.tag).groups()[0]
[docs]def get_attr_tag(element): """Return the tag of an element as an attribute name.""" raw_tag = get_tag(element) return camel_to_snake(raw_tag)
[docs]def get_id_or_about(element): """Return the ID or the about associated with an element""" return element.attrib.get(nselem('rdf', 'ID')) or \ element.attrib.get(nselem('rdf', 'about'))
[docs]def get_ns(element): """Return the name space of a given element.""" return re.match(r'\{(.*)\}', element.tag).groups()[0]
[docs]def has_ns(element, ns): """Return True if the element is from a given name space.""" return get_ns(element) == namespaces[ns]
[docs]def camel_to_snake(txt): """Return snake case from camel case""" return re.sub(r'(?<!^)(?=[A-Z])', '_', txt).lower()
[docs]def snake_to_camel(txt): """Return camel case from snake case.""" parts = txt.split('_') return parts[0] + ''.join([p.capitalize() for p in parts[1:]])