File: check_sorting.py

package info (click to toggle)
lumpy-sv 0.3.1%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 296,072 kB
  • sloc: cpp: 9,908; python: 1,768; sh: 1,384; makefile: 365; ansic: 322; perl: 58
file content (66 lines) | stat: -rwxr-xr-x 1,768 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/local/bin/python
import sys
import subprocess
import os
import numpy as np

if len(sys.argv) < 2:
    print('usage:' + sys.argv[0] + ' <bam 1> <bam 2> <..>')
    exit(1)

order = []


for i in range(1,len(sys.argv)):
    bam_file = sys.argv[i]
    print(bam_file)

    p = subprocess.Popen(\
            ['samtools', 'view', '-H', bam_file], \
            stdout=subprocess.PIPE)

    for l in p.communicate()[0].split('\n'):
        if '@SQ' in l:
            A = l.rstrip().split('\t')
            name = ''
            for a in A:
                if 'SN' in a:
                    name = a.split(':')[1]
            order.append(name)


    p = subprocess.Popen(\
            'samtools view ' +  bam_file + \
            ' | cut -f3,4 | head -n 100000', \
            shell=True, \
            stdout=subprocess.PIPE)

    broke = False
    curr_chrom_index = -1
    curr_pos = -1
    for l in p.communicate()[0].split('\n'):
        if len(l) > 0 :
            a = l.split('\t')
            chrom = a[0]
            pos = int(a[1])

            if order.index(chrom) > curr_chrom_index:
                curr_chrom_index = order.index(chrom)
                curr_pos = -1
            elif order.index(chrom) < curr_chrom_index:
                print('out of order:\t' + l + '\toccurred after\t' + \
                        order[curr_chrom_index] + '\t' + str(curr_pos))
                broke = True
                break

            if pos > curr_pos:
                curr_pos = pos
            elif pos < curr_pos:
                print('out of order:\t' + l + '\toccurred after\t' + \
                        order[curr_chrom_index] + '\t' + str(curr_pos))
                broke = True
                break
    if not broke:
        print("in order")