File: bamToFastq.py

package info (click to toggle)
pbsuite 15.8.24%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 14,512 kB
  • sloc: python: 10,962; sh: 147; xml: 21; makefile: 14
file content (37 lines) | stat: -rwxr-xr-x 1,534 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/python
import pysam, sys, argparse
from pbsuite.utils.FileHandlers import revComp

USAGE="Use pysam to extract fastq sequences from a bam"

parser = argparse.ArgumentParser(description=USAGE, \
        formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("BAM", help="input bam file")
parser.add_argument("POS", default=None, nargs="?", \
                    help="`samtools view` format of the location to grab reads from (optional)")
parser.add_argument("-f", "--flag", default=None,
                    help="Only print reads with flags (comma sep)")
parser.add_argument("-F", "--Flag", default=None,
                    help="Only print reads WITHOUT flags (comma sep)")
args = parser.parse_args()
args.flag = [int(x) for x in args.flag.split(',')] if args.flag is not None else []
args.Flag = [int(x) for x in args.Flag.split(',')] if args.Flag is not None else []

s = pysam.Samfile(args.BAM)

if args.POS is not None:
    chrom, extra = args.POS.split(':')
    start, end = [int(x) for x in extra.split('-')]
    get = s.fetch(reference=chrom, start=start, end=end)
else:
    get = s

for read in get:
    if [read.flag & int(x) for x in args.flag].count(False) \
       or  [read.flag & int(x) for x in args.Flag].count(True):
        continue
        
    if read.is_reverse:
        sys.stdout.write("@{0}\n{1}\n+\n{2}\n".format(read.qname, read.seq.translate(revComp)[::-1], read.qual[::-1]))
    else:
        sys.stdout.write("@{0}\n{1}\n+\n{2}\n".format(read.qname, read.seq, read.qual))