File: Mode2Assembly-A.py

package info (click to toggle)
shasta 0.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,636 kB
  • sloc: cpp: 82,262; python: 2,348; makefile: 222; sh: 143
file content (105 lines) | stat: -rw-r--r-- 4,291 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/python3

"""

This runs a portion of Mode 2 assembly.
It starts at read graph creation, so the alignments must be available.
It then creates the marker graph using the Mode 1/2 protocol
(strict edge creation + creation of secondary edges).
Finally, it assembles all of the marker graph vertices and edges.

"""

import shasta
import GetConfig

import ast
import os

# Read the config file.
config = GetConfig.getConfig()

# Initialize the assembler and access what we need.
a = shasta.Assembler()
a.accessKmers()
a.accessMarkers()
a.accessAlignmentDataReadWrite()
a.accessCompressedAlignments()
a.setupConsensusCaller(config['Assembly']['consensusCaller'])

# Create the read graph.
readGraphCreationMethod = int(config['ReadGraph']['creationMethod'])
if readGraphCreationMethod == 0:
    a.createReadGraph(
        maxAlignmentCount = int(config['ReadGraph']['maxAlignmentCount']))

elif readGraphCreationMethod == 2:
    a.createReadGraph2(
        maxAlignmentCount = int(config['ReadGraph']['maxAlignmentCount']),
        markerCountPercentile = float(config['ReadGraph']['markerCountPercentile']),
        alignedFractionPercentile = float(config['ReadGraph']['alignedFractionPercentile']),
        maxSkipPercentile = float(config['ReadGraph']['maxSkipPercentile']),
        maxDriftPercentile = float(config['ReadGraph']['maxDriftPercentile']),
        maxTrimPercentile = float(config['ReadGraph']['maxTrimPercentile']))
else:
    raise ValueError('Invalid value for --ReadGraph.creationMethod.')
    
strandSeparationMethod = int(config['ReadGraph']['strandSeparationMethod'])

# Limited strand separation in the read graph.
if strandSeparationMethod==1:
    a.flagCrossStrandReadGraphEdges1(
        maxDistance = int(config['ReadGraph']['crossStrandMaxDistance']))

a.flagChimericReads(
    maxChimericReadDistance = int(config['ReadGraph']['maxChimericReadDistance']))

# Flag inconsistent alignments, if requested.
flagInconsistentAlignments = ast.literal_eval(config['ReadGraph']['flagInconsistentAlignments'])
if flagInconsistentAlignments:
    a.flagInconsistentAlignments(
        triangleErrorThreshold = config['ReadGraph']['flagInconsistentAlignmentsTriangleErrorThreshold'],
        leastSquareErrorThreshold = config['ReadGraph']['flagInconsistentAlignmentsLeastSquareErrorThreshold'],
        leastSquareMaxDistance = config['ReadGraph']['flagInconsistentAlignmentsLeastSquareMaxDistance'])

# Strict strand separation in the read graph.
if strandSeparationMethod == 2:
    a.flagCrossStrandReadGraphEdges2()

# Compute connected components of the read graph.
# These are currently not used.
if not strandSeparationMethod == 2:
    a.computeReadGraphConnectedComponents()

# Create the marker graph using strict edge creation.    
a.createMarkerGraphVertices(
    minCoverage = int(config['MarkerGraph']['minCoverage']),
    maxCoverage = int(config['MarkerGraph']['maxCoverage']),
    minCoveragePerStrand = int(config['MarkerGraph']['minCoveragePerStrand']),
    allowDuplicateMarkers = ast.literal_eval(config['MarkerGraph']['allowDuplicateMarkers']),
    peakFinderMinAreaFraction = float(config['MarkerGraph']['peakFinder.minAreaFraction']),
    peakFinderAreaStartIndex = int(config['MarkerGraph']['peakFinder.areaStartIndex']))    
a.findMarkerGraphReverseComplementVertices()
a.createMarkerGraphEdgesStrict(
    minEdgeCoverage = int(config['MarkerGraph']['minEdgeCoverage']),
    minEdgeCoveragePerStrand = int(config['MarkerGraph']['minEdgeCoveragePerStrand']))
a.findMarkerGraphReverseComplementEdges()
a.computeMarkerGraphCoverageHistogram()

# Add secondary edges.
a.createMarkerGraphSecondaryEdges(
    secondaryEdgeMaxSkip = int(config['MarkerGraph']['secondaryEdges.maxSkip']))
a.splitMarkerGraphSecondaryEdges(
    errorRateThreshold = float(config['MarkerGraph']['secondaryEdges.split.errorRateThreshold']),
    minCoverage = int(config['MarkerGraph']['secondaryEdges.split.minCoverage']))

# Assembler all marker graph vertices and edges.
a.assembleMarkerGraphVertices()
a.assembleMarkerGraphEdges(
    markerGraphEdgeLengthThresholdForConsensus =
    int(config['Assembly']['markerGraphEdgeLengthThresholdForConsensus']),
    storeCoverageData = ast.literal_eval(config['Assembly']['storeCoverageData']),
    assembleAllEdges = True)