File: get_read_ends.py

package info (click to toggle)
nanofilt 2.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 62,600 kB
  • sloc: python: 261; sh: 34; makefile: 11
file content (23 lines) | stat: -rw-r--r-- 674 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from argparse import ArgumentParser
from Bio import SeqIO
import gzip


def main():
    args = get_args()
    for record in SeqIO.parse(gzip.open(args.fastq, 'rt'), "fastq"):
        print(record[-args.bases_from_end:].format("fastq"), end="")


def get_args():
    parser = ArgumentParser(description="Filter nanopore data based on time")
    parser.add_argument("fastq", help="input gzip compressed fastq file")
    parser.add_argument("--bases_from_end",
                        help="get a fragment of each read N bp from end",
                        default=100,
                        type=int)
    return parser.parse_args()


if __name__ == '__main__':
    main()