File: dump_per_read_statistics.py

package info (click to toggle)
tombo 1.5.1-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,140 kB
  • sloc: python: 12,433; sh: 613; makefile: 16
file content (50 lines) | stat: -rwxr-xr-x 1,827 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#! /usr/bin/env python3
"""Dump per-read statistics

This script takes a per-read statistics file and dumps its contents out into a tab-separated values file. The columns in the file are 'chrm', 'pos', 'strand', 'read_id' and 'stat'.
"""
from tombo import tombo_stats
from tombo.tombo_helper import intervalData
import argparse
import sys
import os

def extract_per_read_stats(input_file, output_file):
    """Dump per-read statistics to tab-separated values"""
    if not os.path.isfile(input_file):
        sys.exit('"{}" is not a valid file'.format(input_file))

    pr_stats = tombo_stats.PerReadStats(input_file)

    with open(output_file, 'w') as out_fp:
        out_fp.write('{}\t{}\t{}\t{}\t{}\n'.format(
            'chrm', 'pos', 'strand', 'read_id', 'stat'))
        for (chrm, strand), cs_blocks in list(pr_stats.blocks_index.items()):
            for start, block_name in list(cs_blocks.items()):
                for pos, stat, read_id in pr_stats.get_region_per_read_stats(
                        intervalData(chrm, start, start + pr_stats.region_size,
                                    strand)):
                    out_fp.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        chrm, pos, strand, read_id, stat))


def main(input_file, output_file):
    extract_per_read_stats(input_file, output_file)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        'input_file',
        type=str,
        help="the per-read statistics file produced by tombo"
    )
    parser.add_argument(
        '-o',
        '--output_file',
        default='per_read_stats.txt',
        type=str,
        help="the name of the output tsv file (default: 'per_read_stats.txt')"
    )
    args = parser.parse_args()
    main(args.input_file, args.output_file)