File: mts.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (102 lines) | stat: -rwxr-xr-x 3,974 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python3

import argparse
import subprocess
import sys
import os
import os.path
import shutil
import yaml

from scripts.common import fill_default_values

#copied from http://stackoverflow.com/questions/431684/how-do-i-cd-in-python/13197763#13197763
class cd:
    """Context manager for changing the current working directory"""
    def __init__(self, newPath):
        self.newPath = os.path.expanduser(newPath)

    def __enter__(self):
        self.savedPath = os.getcwd()
        os.chdir(self.newPath)

    def __exit__(self, etype, value, traceback):
        os.chdir(self.savedPath)

parser = argparse.ArgumentParser(description="MTS - Metagenomic Time Series")

parser.add_argument("--threads", "-t", type=int, default=8, help="Number of threads")
parser.add_argument("dir", type=str, help="Output directory")
parser.add_argument("--config", "-c", type=str, default="", help="config.yaml to be copied to the directory (unnecessary if config.yaml is already there)")
parser.add_argument("--reuse-assemblies", type=str, help="Directory with existing assemblies to reuse")
parser.add_argument("--reuse-profiles", type=str, help="Directory with existing profiles to reuse")
parser.add_argument("--reuse-from", type=str, help="Directory with another assembly to reuse everything that is possible (overrides other --reuses)")
parser.add_argument("--no-stats", "-S", action="store_true", help="Skip the stats section (overrides the config value)")
parser.add_argument("--verbose", "-v", action="store_true", help="Increase verbosity level")
parser.add_argument("--dryrun", action="store_true", help="Show tasks, do not execute them")

args = parser.parse_args()

exec_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
LOCAL_DIR = os.path.realpath(os.path.join(exec_dir, "../../../"))

base_params = ["snakemake", "--directory", os.path.realpath(args.dir), "--cores", str(args.threads), "--config", "LOCAL_DIR" + "=" + LOCAL_DIR]

if args.verbose:
    base_params.extend(["-p", "--verbose"])

if args.dryrun:
    base_params.extend(["--dryrun"])

if not os.path.exists(args.dir):
    os.makedirs(args.dir)

print("Output folder set to", args.dir)

config_path = os.path.join(args.dir, "config.yaml")
if args.config:
    if os.path.exists(config_path):
        if subprocess.call(["diff", config_path, args.config]):
            print("\033[31mConfig path specified, but different config.yaml already exists in output folder", args.dir, "\033[0m")
            sys.exit(239)
    else:
        print("Copying config from", args.config)
        shutil.copy(args.config, config_path)

with cd(exec_dir):
    def call_snake(extra_params=[]):
        subprocess.check_call(base_params + extra_params, stdout=sys.stdout, stderr=sys.stderr)

    def reuse_dir(dir_from, dir_name):
        if not dir_from:
            return
        local_dir = os.path.join(args.dir, dir_name)
        if not os.path.isdir(dir_from):
            print("\033[33mWarning: {} source directory doesn't exist\033[0m".format(dir_from))
            return
        if os.path.exists(local_dir):
            print("\033[33mWarning: {} destination directory already exists\033[0m".format(dir_name))
            return
        os.symlink(dir_from, local_dir)

    with open(config_path) as config_in:
        config = yaml.load(config_in)
    fill_default_values(config)

    if args.reuse_from:
        args.reuse_assemblies = os.path.join(args.reuse_from, "assembly")
        args.reuse_profiles = os.path.join(args.reuse_from, "profile")

    reuse_dir(args.reuse_assemblies, "assembly")
    reuse_dir(args.reuse_profiles, "profile")

    print("Step #1 - Assembly")
    call_snake()

    if config.get("reassembly", dict()).get("enabled", True):
        print("Step #1b - Reassembly")
        call_snake(["--snakefile", "Reassembly.snake"])

    if not args.no_stats and len(config.get("stats", dict())) > 0:
        print("Step #2 - Stats")
        call_snake(["--snakefile", "Stats.snake"])