File: AssembleSegment.py

package info (click to toggle)
shasta 0.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,636 kB
  • sloc: cpp: 82,262; python: 2,348; makefile: 222; sh: 143
file content (41 lines) | stat: -rwxr-xr-x 1,234 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/python3

import shasta
import GetConfig
import argparse

# Get from the arguments the edge id (same as segment id).
parser = argparse.ArgumentParser(description='Write to a csv file assembly details for a single segment.')
parser.add_argument('edgeId', type=int)
edgeId = parser.parse_args().edgeId


# Read the config file.
config = GetConfig.getConfig()

# Create the Assembler.
a = shasta.Assembler()

# Set up the consensus caller.
a.setupConsensusCaller(config['Assembly']['consensusCaller'])

# Access what we need.
a.accessKmers()
a.accessMarkers()
a.accessMarkerGraphVertices()
a.accessMarkerGraphEdges()
a.accessAssemblyGraphEdgeLists()
a.accessMarkerGraphConsensus()
a.accessMarkerGraphCoverageData()
assembledSegment = a.assembleAssemblyGraphEdge(edgeId)

csv = open(str(edgeId) + '.csv', 'w')
for position in range(assembledSegment.size()):
    coverageData = assembledSegment.getCoverageData(position)
    csv.write('%i,' % position)
    csv.write('%s,' % assembledSegment.getBase(position))
    csv.write('%i,' % assembledSegment.getRepeatCount(position))
    for cd in coverageData:
       csv.write('%s%i%s %i,' % (cd.getBase(), cd.getRepeatCount(), cd.getStrand(), cd.getFrequency()))
    csv.write('\n')