File: chemistry.py

package info (click to toggle)
python-pbcore 2.1.2%2Bdfsg-12
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 6,552 kB
  • sloc: python: 13,404; xml: 2,504; makefile: 225; sh: 66
file content (92 lines) | stat: -rw-r--r-- 3,421 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
__all__ = ["tripleFromMetadataXML",
           "decodeTriple",
           "ChemistryLookupError"]

from collections import OrderedDict
import xml.etree.ElementTree as ET
import os.path

from importlib import resources


class ChemistryLookupError(Exception):
    pass


def _loadBarcodeMappingsFromFile(mapFile):
    try:
        tree = ET.parse(mapFile)
        root = tree.getroot()
        mappingElements = root.findall("Mapping")
        mappings = OrderedDict()
        mapKeys = ["BindingKit", "SequencingKit",
                   "SoftwareVersion", "SequencingChemistry"]
        for mapElement in mappingElements:
            bindingKit = mapElement.find("BindingKit").text
            sequencingKit = mapElement.find("SequencingKit").text
            softwareVersion = mapElement.find("SoftwareVersion").text
            sequencingChemistry = mapElement.find("SequencingChemistry").text
            mappings[(bindingKit, sequencingKit, softwareVersion)
                     ] = sequencingChemistry
        return mappings
    except Exception:
        raise ChemistryLookupError("Error loading chemistry mapping xml")


def _loadBarcodeMappings():
    try:
        mappingFnameContext = resources.as_file(
            resources.files('pbcore') /
                'chemistry/resources/mapping.xml')
        with mappingFnameContext as mappingFname:
            mappings = _loadBarcodeMappingsFromFile(mappingFname)
    except:
        mappingFname = os.path.join(os.path.dirname(__file__),
            'resources/mapping.xml')
        mappings = _loadBarcodeMappingsFromFile(mappingFname)

    updMappingDir = os.getenv("SMRT_CHEMISTRY_BUNDLE_DIR")
    if updMappingDir:
        import logging
        from os.path import join
        logging.info(
            "Loading updated chemistry mapping XML from {}".format(updMappingDir))
        mappings.update(_loadBarcodeMappingsFromFile(
            join(updMappingDir, 'chemistry.xml')))
    return mappings


_BARCODE_MAPPINGS = _loadBarcodeMappings()


def tripleFromMetadataXML(metadataXmlPath):
    """
    Scrape the triple from the metadata.xml, or exception if the file
    or the relevant contents are not found
    """
    nsd = {None: "http://pacificbiosciences.com/PAP/Metadata.xsd",
           "pb": "http://pacificbiosciences.com/PAP/Metadata.xsd"}
    try:
        tree = ET.parse(metadataXmlPath)
        root = tree.getroot()
        bindingKit = root.find(
            "pb:BindingKit/pb:PartNumber", namespaces=nsd).text
        sequencingKit = root.find(
            "pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
        # The instrument version is truncated to the first 2 dot delimited components
        instrumentControlVersion = root.find(
            "pb:InstCtrlVer", namespaces=nsd).text
        verComponents = instrumentControlVersion.split(".")[0:2]
        instrumentControlVersion = ".".join(verComponents)
        return (bindingKit, sequencingKit, instrumentControlVersion)
    except Exception as e:
        raise ChemistryLookupError(
            "Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))


def decodeTriple(bindingKit, sequencingKit, softwareVersion):
    """
    Return the name of the chemisty configuration given the
    configuration triple that was recorded on the instrument.
    """
    return _BARCODE_MAPPINGS.get((bindingKit, sequencingKit, softwareVersion), "unknown")