File: init.py

package info (click to toggle)
hisat2 2.2.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 19,448 kB
  • sloc: cpp: 97,109; python: 11,075; perl: 7,279; sh: 2,328; ansic: 1,458; makefile: 532; javascript: 273; java: 116
file content (98 lines) | stat: -rwxr-xr-x 2,718 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/python3

import sys, os, signal
import string, re

signal.signal(signal.SIGPIPE, signal.SIG_DFL)
use_message = '''
'''

osx_mode = False
if sys.platform == 'darwin':
    osx_mode = True

def make_cat_cmd(gzmode, read_dir_base, read_dir, fq_name, num_read):
    cmd = []
    if gzmode:
        if osx_mode:
            cmd += ["gzcat"]
        else:
            cmd += ["zcat"]
    else:
        cmd += ["cat"]

    cmd += ["../../%s%s/%s" % (read_dir_base, read_dir, fq_name)]
    cmd += ["|", "head", "-n", "%d" % (num_read * 4)]

    if gzmode:
        cmd += ["|", "gzip"]

    cmd += [">", fq_name]
    return ' '.join(cmd)


def init():
    read_dir_base = "../reads/real/"
    read_dirs = os.listdir(read_dir_base)
    for read_dir in read_dirs:
        if os.path.exists(read_dir):
            continue

        gz_file = False
        fq_1_name = '1.fq'
        fq_2_name = '2.fq'
        if os.path.exists(read_dir_base + read_dir + "/1.fq.gz") and \
            os.path.exists(read_dir_base + read_dir + "/2.fq.gz"):
            gz_file = True
            fq_1_name = '1.fq.gz'
            fq_2_name = '2.fq.gz'
        else:
            if not os.path.exists(read_dir_base + read_dir + "/1.fq") or \
                 not os.path.exists(read_dir_base + read_dir + "/1.fq"):
                continue

        print("Processing", read_dir, "...", file=sys.stderr)

        os.mkdir(read_dir)
        os.chdir(read_dir)

        RNA = (read_dir.find("RNA") != -1)
        tests = [
            ["1M", 1000000],
            #["5M", 5000000],
            ["10M", 10000000],
            #["20M", 20000000],
            ["whole", 0],
            ]

        for dir_name, num_reads in tests:
            if os.path.exists(dir_name):
                continue

            os.mkdir(dir_name)
            os.chdir(dir_name)

            if dir_name == "whole":
                ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_1_name)
                print(ln_cmd, file=sys.stderr)
                os.system(ln_cmd)
                ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_2_name)
                print(ln_cmd, file=sys.stderr)
                os.system(ln_cmd)
            else:
                cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_1_name, num_reads)
                print(cmd, file=sys.stderr)
                os.system(cmd)

                cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_2_name, num_reads)
                print(cmd, file=sys.stderr)
                os.system(cmd)

            os.system("ln -s ../../calculate_read_cost.py .")
            os.chdir("..")

        os.chdir("..")
    

if __name__ == "__main__":
    init()