1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
|
# Copyright 2013 by Leighton Pritchard. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""This module provides classes and functions to parse a KGML pathway map.
The KGML pathway map is parsed into the object structure defined in
KGML_Pathway.py in this module.
Classes:
- KGMLParser Parses KGML file
Functions:
- read Returns a single Pathway object, using KGMLParser
internally
"""
from __future__ import print_function
try:
import xml.etree.cElementTree as ElementTree
except ImportError:
import xml.etree.ElementTree as ElementTree
from Bio._py3k import StringIO
from Bio.KEGG.KGML.KGML_pathway import *
def read(handle, debug=0):
"""Parses a single KEGG Pathway from given file handle.
Returns a single Pathway object. There should be one and only
one pathway in each file, but there may well be pathological
examples out there.
"""
iterator = parse(handle, debug)
try:
first = next(iterator)
except StopIteration:
first = None
if first is None:
raise ValueError("No pathways found in handle")
try:
second = next(iterator)
except StopIteration:
second = None
if second is not None:
raise ValueError("More than one pathway found in handle")
return first
def parse(handle, debug=0):
"""Returns an iterator over Pathway elements.
Arguments:
- handle - file handle to a KGML file for parsing
- debug - integer for amount of debug information to print
This is a generator for the return of multiple Pathway objects.
"""
# Check handle
if not hasattr(handle, 'read'):
if isinstance(handle, str):
handle = StringIO(handle)
else:
exc_txt = "An XML-containing handle or an XML string " +\
"must be provided"
raise Exception(exc_txt)
# Parse XML and return each Pathway
for event, elem in \
ElementTree.iterparse(handle, events=('start', 'end')):
if event == "end" and elem.tag == "pathway":
yield KGMLParser(elem).parse()
elem.clear()
class KGMLParser(object):
"""Parses a KGML XML Pathway entry into a Pathway object."""
def __init__(self, elem):
self.entry = elem
def parse(self):
"""Parse the input elements."""
def _parse_pathway(attrib):
for k, v in attrib.items():
self.pathway.__setattr__(k, v)
def _parse_entry(element):
new_entry = Entry()
for k, v in element.attrib.items():
new_entry.__setattr__(k, v)
for subelement in element.getchildren():
if subelement.tag == 'graphics':
_parse_graphics(subelement, new_entry)
elif subelement.tag == 'component':
_parse_component(subelement, new_entry)
self.pathway.add_entry(new_entry)
def _parse_graphics(element, entry):
new_graphics = Graphics(entry)
for k, v in element.attrib.items():
new_graphics.__setattr__(k, v)
entry.add_graphics(new_graphics)
def _parse_component(element, entry):
new_component = Component(entry)
for k, v in element.attrib.items():
new_component.__setattr__(k, v)
entry.add_component(new_component)
def _parse_reaction(element):
new_reaction = Reaction()
for k, v in element.attrib.items():
new_reaction.__setattr__(k, v)
for subelement in element.getchildren():
if subelement.tag == 'substrate':
new_reaction.add_substrate(int(subelement.attrib['id']))
elif subelement.tag == 'product':
new_reaction.add_product(int(subelement.attrib['id']))
self.pathway.add_reaction(new_reaction)
def _parse_relation(element):
new_relation = Relation()
new_relation.entry1 = int(element.attrib['entry1'])
new_relation.entry2 = int(element.attrib['entry2'])
new_relation.type = element.attrib['type']
for subtype in element.getchildren():
name, value = subtype.attrib['name'], subtype.attrib['value']
if name in ('compound', 'hidden compound'):
new_relation.subtypes.append((name, int(value)))
else:
new_relation.subtypes.append((name, value))
self.pathway.add_relation(new_relation)
# ==========
# Initialise Pathway
self.pathway = Pathway()
# Get information about the pathway itself
_parse_pathway(self.entry.attrib)
for element in self.entry.getchildren():
if element.tag == 'entry':
_parse_entry(element)
elif element.tag == 'reaction':
_parse_reaction(element)
elif element.tag == 'relation':
_parse_relation(element)
# Parsing of some elements not implemented - no examples yet
else:
# This should warn us of any unimplemented tags
import warnings
from Bio import BiopythonParserWarning
warnings.warn("Warning: tag %s not implemented in parser" %
element.tag, BiopythonParserWarning)
return self.pathway
if __name__ == '__main__':
# Check large metabolism
pathway = read(open('ko01100.xml', 'rU'))
print(pathway)
for k, v in list(pathway.entries.items())[:20]:
print(v)
for r in list(pathway.reactions)[:20]:
print(r)
print(len(pathway.maps))
# Check relations
pathway = read(open('ko_metabolic/ko00010.xml', 'rU'))
print(pathway)
for k, v in list(pathway.entries.items())[:20]:
print(v)
for r in list(pathway.reactions[:20]):
print(r)
for r in list(pathway.relations[:20]):
print(r)
print(len(pathway.maps))
# Check components
pathway = read(open('ko_metabolic/ko00253.xml', 'rU'))
print(pathway)
for k, v in pathway.entries.items():
print(v)
print(len(pathway.maps))
# Test XML representation
print(pathway.get_KGML())
# Test bounds of pathway
print(pathway.bounds)
|