import os
import pathlib
import re
from typing import Union
from lxml import etree
from lxml.builder import ElementMaker
namespaces = {
'xsd': 'http://www.w3.org/2001/XMLSchema#',
'owl': 'http://www.w3.org/2002/07/owl#',
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
'bp': 'http://www.biopax.org/release/biopax-level3.owl#',
'xml': 'http://www.w3.org/XML/1998/namespace'
}
makers = {
ns: ElementMaker(namespace=prefix)
for ns, prefix in namespaces.items()
}
[docs]def wrap_xml_elements(elements, xml_base):
"""Return a valid BioPAX OWL wrapping XML-serialized BioPAX objects."""
# We first make the RDF wrapper and add an Ontology element first
rdfm = ElementMaker(namespace=namespaces['rdf'],
nsmap=namespaces)
rdf_element = rdfm('RDF',
**{nselem('xml', 'base'): xml_base})
owl_element = makers['owl']('Ontology',
**{nselem('rdf', 'about'): ''})
imports = makers['owl']('imports',
**{nselem('rdf', 'resource'): namespaces['bp']})
owl_element.append(imports)
rdf_element.append(owl_element)
for element in elements:
rdf_element.append(element)
return rdf_element
[docs]def xml_to_str(xml):
"""Return the OWL string for an XML element tree."""
xmlb = etree.tostring(xml, pretty_print=True,
encoding='utf-8', xml_declaration=True)
xmls = xmlb.decode('utf-8')
xmls = '\n'.join([re.sub(r'^ <bp', '\n<bp', x) for x in xmls.split('\n')])
xmls = '\n'.join([re.sub(r'^ </bp', '</bp', x) for x in xmls.split('\n')])
xmls = '\n'.join([re.sub(r'^ <', ' <', x) for x in xmls.split('\n')])
return xmls
[docs]def xml_to_file(xml, fname: Union[str, pathlib.Path, os.PathLike]):
"""Write an XML element tree to a given file."""
with open(fname, 'w') as fh:
fh.write(xml_to_str(xml))
[docs]def nselem(ns, elem):
"""Return a full namespaced string with curly brackets with a suffix."""
return '{%s}%s' % (namespaces[ns], elem)
[docs]def nssuffix(ns, suffix):
"""Return a full namespaced string with a suffix."""
return '%s%s' % (namespaces[ns], suffix)
[docs]def get_datatype(attrib):
"""Return the RDF data type of an element attribute."""
return attrib.get(nselem('rdf', 'datatype'))
[docs]def get_resource(attrib):
"""Return the resource associated with an element attribute."""
res = attrib.get(nselem('rdf', 'resource'))
if res:
if res.startswith('#'):
return res[1:]
else:
return res
[docs]def is_url(txt):
"""Return true if the given string is an URL."""
return txt.startswith('http')
[docs]def is_datatype(attrib, prefix, datatype):
"""Return True of the given attribute is of a given type."""
possibilities = {nssuffix(prefix, datatype),
f'{prefix}:{datatype}'}
return get_datatype(attrib) in possibilities
[docs]def get_tag(element):
"""Return the tag of an element."""
return re.match(r'.*}(.+)', element.tag).groups()[0]
[docs]def get_attr_tag(element):
"""Return the tag of an element as an attribute name."""
raw_tag = get_tag(element)
return camel_to_snake(raw_tag)
[docs]def get_id_or_about(element):
"""Return the ID or the about associated with an element"""
return element.attrib.get(nselem('rdf', 'ID')) or \
element.attrib.get(nselem('rdf', 'about'))
[docs]def get_ns(element):
"""Return the name space of a given element."""
return re.match(r'\{(.*)\}', element.tag).groups()[0]
[docs]def has_ns(element, ns):
"""Return True if the element is from a given name space."""
return get_ns(element) == namespaces[ns]
[docs]def camel_to_snake(txt):
"""Return snake case from camel case"""
return re.sub(r'(?<!^)(?=[A-Z])', '_', txt).lower()
[docs]def snake_to_camel(txt):
"""Return camel case from snake case."""
parts = txt.split('_')
return parts[0] + ''.join([p.capitalize() for p in parts[1:]])