File: writeSummaryToCmp.py

package info (click to toggle)
kineticstools 0.6.1%2Bgit20220223.1326a4d%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 22,140 kB
  • sloc: python: 3,503; makefile: 202; ansic: 104; sh: 55; xml: 19
file content (140 lines) | stat: -rwxr-xr-x 4,917 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python

import cProfile
from pbcore.io import GffReader, Gff3Record
import os
import logging
import sys

from pbcore.util.ToolRunner import PBToolRunner

__version__ = "1.0"


class IpdRatioSummaryWriter(PBToolRunner):

    def __init__(self):
        desc = ['Summarizes kinetic modifications in the alignment_summary.gff file',
                'Notes: For all command-line arguments, default values are listed in [].']
        super(IpdRatioSummaryWriter, self).__init__('\n'.join(desc))

        self.parser.add_argument('--pickle',
                                 dest="pickle",
                                 help='Name of input GFF file [%(default)s]')

        self.parser.add_argument('--alignmentSummary',
                                 dest="alignmentSummary",
                                 help='Name alignment summary file [%(default)s]')

        self.parser.add_argument('--outfile',
                                 dest="outfile",
                                 help='Name of modified alignment summary GFF file [%(default)s]')

        self.parser.add_argument("--profile",
                                 action="store_true",
                                 dest="doProfiling",
                                 default=False,
                                 help="Enable Python-level profiling (using cProfile).")

    def getVersion(self):
        return __version__

    def validateArgs(self):
        if not os.path.exists(self.args.modifications):
            self.parser.error(
                'input modifications gff file provided does not exist')

        if not os.path.exists(self.args.alignmentSummary):
            self.parser.error(
                'input alignment summary gff file provided does not exist')

    def run(self):
        self.options = self.args

        # Log generously
        logFormat = '%(asctime)s [%(levelname)s] %(message)s'
        logging.basicConfig(level=logging.INFO, format=logFormat)
        stdOutHandler = logging.StreamHandler(sys.stdout)
        logging.Logger.root.addHandler(stdOutHandler)
        logging.info("t1")

        if self.args.doProfiling:
            cProfile.runctx("self._mainLoop()",
                            globals=globals(),
                            locals=locals(),
                            filename="profile-main4.out")

        else:
            return self._mainLoop()

    def _mainLoop(self):

        # Read in the existing modifications.gff
        modReader = GffReader(self.args.modifications)

        # Set up some additional headers to be injected
        headers = [
            ('source', 'kineticModificationCaller 1.3.1'),
            ('source-commandline', " ".join(sys.argv)),
            ('attribute-description',
             'modsfwd - count of detected DNA modifications on forward strand'),
            ('attribute-description',
             'modsrev - count of detected DNA modifications on reverse strand')
        ]

        # Get modification calls
        hits = [{"pos": x.start, "strand": x.strand}
                for x in modReader if x.type == 'modified_base']

        # Summary reader
        summaryFile = file(self.args.alignmentSummary)

        # Modified gff file
        summaryWriter = file(self.args.outfile, "w")

        self.seqMap = {}
        inHeader = True

        # Loop through
        for line in summaryFile:
            # Pass any metadata line straight through
            if line[0] == "#":

                # Parse headers
                splitFields = line.replace('#', '').split(' ')
                field = splitFields[0]
                value = " ".join(splitFields[1:])
                if field == 'sequence-header':
                    [internalTag, delim, externalTag] = value.strip().partition(' ')
                    self.seqMap[internalTag] = externalTag
                print(line.strip(), file=summaryWriter)
                continue

            if inHeader:
                # We are at the end of the header -- write the tool-specific
                # headers
                for field in headers:
                    print(("##%s %s" % field), file=summaryWriter)
                inHeader = False

            # Parse the line
            rec = Gff3Record.fromString(line)

            if rec.type == 'region':
                # Get the hits in this interval, add them to the gff record
                intervalHits = [
                    h for h in hits if rec.start <= h['pos'] <= rec.end]
                strand0Hits = len(
                    [h for h in intervalHits if h['strand'] == '+'])
                strand1Hits = len(
                    [h for h in intervalHits if h['strand'] == '-'])

                rec.modsfwd = strand0Hits
                rec.modsrev = strand1Hits

                print(str(rec), file=summaryWriter)


if __name__ == "__main__":
    kt = ModificationSummary()
    kt.start()