File: chemistry.py

package info (click to toggle)
python-pbcore 1.6.5%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 19,168 kB
  • sloc: python: 25,497; xml: 2,846; makefile: 251; sh: 24
file content (68 lines) | stat: -rw-r--r-- 3,085 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from __future__ import absolute_import

__all__ = ["tripleFromMetadataXML",
           "decodeTriple",
           "ChemistryLookupError" ]

import xml.etree.ElementTree as ET, os.path
from pkg_resources import Requirement, resource_filename
from collections import OrderedDict

class ChemistryLookupError(Exception): pass

def _loadBarcodeMappingsFromFile(mapFile):
    try:
        tree = ET.parse(mapFile)
        root = tree.getroot()
        mappingElements = root.findall("Mapping")
        mappings = OrderedDict()
        mapKeys = ["BindingKit", "SequencingKit", "SoftwareVersion", "SequencingChemistry"]
        for mapElement in mappingElements:
            bindingKit          = mapElement.find("BindingKit").text
            sequencingKit       = mapElement.find("SequencingKit").text
            softwareVersion     = mapElement.find("SoftwareVersion").text
            sequencingChemistry = mapElement.find("SequencingChemistry").text
            mappings[(bindingKit, sequencingKit, softwareVersion)] = sequencingChemistry
        return mappings
    except:
        raise ChemistryLookupError("Error loading chemistry mapping xml")

def _loadBarcodeMappings():
    mappingFname = resource_filename(Requirement.parse('pbcore'),'pbcore/chemistry/resources/mapping.xml')
    mappings = _loadBarcodeMappingsFromFile(mappingFname)
    updMappingDir = os.getenv("SMRT_CHEMISTRY_BUNDLE_DIR")
    if updMappingDir:
        import logging
        from os.path import join
        logging.info("Loading updated chemistry mapping XML from {}".format(updMappingDir))
        mappings.update(_loadBarcodeMappingsFromFile(join(updMappingDir, 'chemistry.xml')))
    return mappings

_BARCODE_MAPPINGS = _loadBarcodeMappings()

def tripleFromMetadataXML(metadataXmlPath):
    """
    Scrape the triple from the metadata.xml, or exception if the file
    or the relevant contents are not found
    """
    nsd = {None: "http://pacificbiosciences.com/PAP/Metadata.xsd",
           "pb": "http://pacificbiosciences.com/PAP/Metadata.xsd"}
    try:
        tree = ET.parse(metadataXmlPath)
        root = tree.getroot()
        bindingKit = root.find("pb:BindingKit/pb:PartNumber", namespaces=nsd).text
        sequencingKit = root.find("pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
        # The instrument version is truncated to the first 2 dot delimited components
        instrumentControlVersion = root.find("pb:InstCtrlVer", namespaces=nsd).text
        verComponents = instrumentControlVersion.split(".")[0:2]
        instrumentControlVersion = ".".join(verComponents)
        return (bindingKit, sequencingKit, instrumentControlVersion)
    except Exception as e:
        raise ChemistryLookupError("Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))

def decodeTriple(bindingKit, sequencingKit, softwareVersion):
    """
    Return the name of the chemisty configuration given the
    configuration triple that was recorded on the instrument.
    """
    return _BARCODE_MAPPINGS.get((bindingKit, sequencingKit, softwareVersion), "unknown")