1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
#!/usr/bin/python3
import sys, os, signal
import string, re
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
use_message = '''
'''
osx_mode = False
if sys.platform == 'darwin':
osx_mode = True
def make_cat_cmd(gzmode, read_dir_base, read_dir, fq_name, num_read):
cmd = []
if gzmode:
if osx_mode:
cmd += ["gzcat"]
else:
cmd += ["zcat"]
else:
cmd += ["cat"]
cmd += ["../../%s%s/%s" % (read_dir_base, read_dir, fq_name)]
cmd += ["|", "head", "-n", "%d" % (num_read * 4)]
if gzmode:
cmd += ["|", "gzip"]
cmd += [">", fq_name]
return ' '.join(cmd)
def init():
read_dir_base = "../reads/real/"
read_dirs = os.listdir(read_dir_base)
for read_dir in read_dirs:
if os.path.exists(read_dir):
continue
gz_file = False
fq_1_name = '1.fq'
fq_2_name = '2.fq'
if os.path.exists(read_dir_base + read_dir + "/1.fq.gz") and \
os.path.exists(read_dir_base + read_dir + "/2.fq.gz"):
gz_file = True
fq_1_name = '1.fq.gz'
fq_2_name = '2.fq.gz'
else:
if not os.path.exists(read_dir_base + read_dir + "/1.fq") or \
not os.path.exists(read_dir_base + read_dir + "/1.fq"):
continue
print("Processing", read_dir, "...", file=sys.stderr)
os.mkdir(read_dir)
os.chdir(read_dir)
RNA = (read_dir.find("RNA") != -1)
tests = [
["1M", 1000000],
#["5M", 5000000],
["10M", 10000000],
#["20M", 20000000],
["whole", 0],
]
for dir_name, num_reads in tests:
if os.path.exists(dir_name):
continue
os.mkdir(dir_name)
os.chdir(dir_name)
if dir_name == "whole":
ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_1_name)
print(ln_cmd, file=sys.stderr)
os.system(ln_cmd)
ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_2_name)
print(ln_cmd, file=sys.stderr)
os.system(ln_cmd)
else:
cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_1_name, num_reads)
print(cmd, file=sys.stderr)
os.system(cmd)
cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_2_name, num_reads)
print(cmd, file=sys.stderr)
os.system(cmd)
os.system("ln -s ../../calculate_read_cost.py .")
os.chdir("..")
os.chdir("..")
if __name__ == "__main__":
init()
|