1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
from __future__ import absolute_import
__all__ = ["tripleFromMetadataXML",
"decodeTriple",
"ChemistryLookupError" ]
import xml.etree.ElementTree as ET, os.path
from pkg_resources import Requirement, resource_filename
from collections import OrderedDict
class ChemistryLookupError(Exception): pass
def _loadBarcodeMappingsFromFile(mapFile):
try:
tree = ET.parse(mapFile)
root = tree.getroot()
mappingElements = root.findall("Mapping")
mappings = OrderedDict()
mapKeys = ["BindingKit", "SequencingKit", "SoftwareVersion", "SequencingChemistry"]
for mapElement in mappingElements:
bindingKit = mapElement.find("BindingKit").text
sequencingKit = mapElement.find("SequencingKit").text
softwareVersion = mapElement.find("SoftwareVersion").text
sequencingChemistry = mapElement.find("SequencingChemistry").text
mappings[(bindingKit, sequencingKit, softwareVersion)] = sequencingChemistry
return mappings
except:
raise ChemistryLookupError("Error loading chemistry mapping xml")
def _loadBarcodeMappings():
mappingFname = resource_filename(Requirement.parse('pbcore'),'pbcore/chemistry/resources/mapping.xml')
mappings = _loadBarcodeMappingsFromFile(mappingFname)
updMappingDir = os.getenv("SMRT_CHEMISTRY_BUNDLE_DIR")
if updMappingDir:
import logging
from os.path import join
logging.info("Loading updated chemistry mapping XML from {}".format(updMappingDir))
mappings.update(_loadBarcodeMappingsFromFile(join(updMappingDir, 'chemistry.xml')))
return mappings
_BARCODE_MAPPINGS = _loadBarcodeMappings()
def tripleFromMetadataXML(metadataXmlPath):
"""
Scrape the triple from the metadata.xml, or exception if the file
or the relevant contents are not found
"""
nsd = {None: "http://pacificbiosciences.com/PAP/Metadata.xsd",
"pb": "http://pacificbiosciences.com/PAP/Metadata.xsd"}
try:
tree = ET.parse(metadataXmlPath)
root = tree.getroot()
bindingKit = root.find("pb:BindingKit/pb:PartNumber", namespaces=nsd).text
sequencingKit = root.find("pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
# The instrument version is truncated to the first 2 dot delimited components
instrumentControlVersion = root.find("pb:InstCtrlVer", namespaces=nsd).text
verComponents = instrumentControlVersion.split(".")[0:2]
instrumentControlVersion = ".".join(verComponents)
return (bindingKit, sequencingKit, instrumentControlVersion)
except Exception as e:
raise ChemistryLookupError("Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))
def decodeTriple(bindingKit, sequencingKit, softwareVersion):
"""
Return the name of the chemisty configuration given the
configuration triple that was recorded on the instrument.
"""
return _BARCODE_MAPPINGS.get((bindingKit, sequencingKit, softwareVersion), "unknown")
|