File: py_kmc_dump.py

package info (click to toggle)
kmc 3.1.1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,376 kB
  • sloc: cpp: 33,006; python: 372; perl: 178; makefile: 135; sh: 34
file content (65 lines) | stat: -rw-r--r-- 2,126 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
import sys
import py_kmc_api as pka
import argparse
import textwrap

VER = "3.1.1"
DATE = "2019-05-19"

def GeneralHelp():
    print('KMC dump ver. {} ({})'.format(VER, DATE))

class VersionHelpAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):        
        GeneralHelp()
        parser.print_help()        
        parser.exit()

class MyParamsParser(argparse.ArgumentParser):
    def error(self, message):
        sys.stderr.write('error: %s\n' % message)
        GeneralHelp()
        self.print_help()
        sys.exit(1)

parser = MyParamsParser(add_help=False)
parser.register('action', 'version_help_action', VersionHelpAction)


group1 = parser.add_argument_group('help and version')
group1.add_argument("--version", action='version_help_action', nargs=0, help="print help and version")
group1.add_argument("-h", "--help", action='version_help_action', nargs=0, help="print help and version")

group2 = parser.add_argument_group('normal run')
group2.add_argument("kmc_database", help="kmc database")
group2.add_argument("output_file", help="output file")
group2.add_argument("-ci", "--cutoff_min", type=int, help="exclude k-mers occurring less than CI times", default=0)
group2.add_argument("-cx", "--cutoff_max", type=int, help="exclude k-mers occurring more of than CX times", default=0)

args = parser.parse_args()

kmer_data_base = pka.KMCFile()
if not kmer_data_base.OpenForListing(args.kmc_database):
    print("Error: cannot open kmc database")
    sys.exit(1)

info = kmer_data_base.Info()
kmer_object = pka.KmerAPI(info.kmer_length)

if args.cutoff_min > 0:
    if not kmer_data_base.SetMinCount(args.cutoff_min):
        print("Error: cannot set cutoff min")
        sys.exit(1)

if args.cutoff_max > 0:
    if not kmer_data_base.SetMaxCount(args.cutoff_max):
        print("Error: cannot set cutoff max")
        sys.exit(1)

output_file = open(args.output_file, 'w')

counter = pka.Count()
while kmer_data_base.ReadNextKmer(kmer_object, counter):
    output_file.write("{}\t{}\n".format(kmer_object, counter.value))
output_file.close()