File: parse.py

package info (click to toggle)
python-seqcluster 1.2.9%2Bds-3
  • links: PTS, VCS
  • area: contrib
  • in suites: bookworm
  • size: 113,624 kB
  • sloc: python: 5,308; makefile: 184; sh: 122; javascript: 55
file content (224 lines) | stat: -rw-r--r-- 10,407 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
from __future__ import print_function
import argparse
import sys

from seqcluster import __version__

def parse_cl(in_args):
    print(in_args)
    sub_cmds = {"prepare": add_subparser_prepare,
                "cluster": add_subparser_cluster,
                "seqbuster": add_subparser_mirbuster,
                "report": add_subparser_report,
                "target": add_subparser_target,
                "predict": add_subparser_predict,
                "explore": add_subparser_explore,
                "collapse": add_subparser_collapse,
                "simulator": add_subparser_simulator,
                "stats": add_subparser_stats}
    parser = argparse.ArgumentParser(description="small RNA analysis")
    parser.add_argument("--version", action="store_true",help="show version.")
    sub_cmd = None
    if len(in_args) > 0 and in_args[0] in sub_cmds:
        subparsers = parser.add_subparsers(help="seqcluster supplemental commands")
        sub_cmds[in_args[0]](subparsers)
        sub_cmd = in_args[0]
    elif (len(in_args) > 0):
        args = parser.parse_args()
        if args.version:
            print("seqcluster %s" % __version__)
            sys.exit(0)
    else:
        print("use %s" % sub_cmds.keys())
        print("seqcluster %s" % __version__)
        sys.exit(0)
        
    args = parser.parse_args()
    assert sub_cmd is not None
    kwargs = {"args": args, sub_cmd: True}
    return kwargs


def _add_debug_option(parser):
    parser.add_argument("-d", "--debug", action="store_true",
                        dest="debug", help="max verbosity mode", default=False)
    parser.add_argument("-vd", "--print_debug", action="store_true",
                        help="print debug messageson terminal", default=False)
    return parser


def add_subparser_report(subparsers):
    parser = subparsers.add_parser("report", help="report data")
    parser.add_argument("-j", "--json", dest="json", required=1,
            help="json file from seqcluster")
    parser.add_argument("-o", "--out", dest="out", required=1,
            help="dir of output files")
    parser.add_argument("-r", "--reference", dest="ref", required=1,
            help="reference fasta file with index"),
    parser.add_argument("--razer", action="store_true",
            help="map sequences with razer3")
    parser = _add_debug_option(parser)
    return parser


def add_subparser_mirbuster(subparsers):
    parser = subparsers.add_parser("seqbuster", help="realign miRNA BAM file")
    parser.add_argument("files", nargs="*", help="Bam files.")
    parser.add_argument("-o", "--out", dest="out", required=1,
                        help="dir of output files")
    parser.add_argument("--sps", required=1,
                        help="species")
    parser.add_argument("--hairpin", help="hairpin.fa")
    parser.add_argument("--gtf", help="gtf file with precursor position to genome.")
    parser.add_argument("--mirna", help="miRNA.str")
    parser.add_argument("--miraligner", action="store_true",
                        help="align with JAVA version.", default=True)
    parser.add_argument("--collapse", action="store_true",
                        help="collapse reads into unique sequences.",
                        default=False)
    parser = _add_debug_option(parser)
    return parser


def add_subparser_explore(subparsers):
    parser = subparsers.add_parser("explore", help="explore data")
    parser.add_argument("-j", "--json", dest="json", required=1,
            help="json file from seqcluster")
    parser.add_argument("-n", "--names", dest="names", required=1,
            help="comma-separeted id clusters"),
    parser.add_argument("-r", "--reference", dest="ref", required=1,
            help="reference fasta file with index"),
    parser.add_argument("-o", "--out", dest="out", required=1,
            help="dir of output files")
    parser = _add_debug_option(parser)
    return parser


def add_subparser_prepare(subparsers):
    parser = subparsers.add_parser("prepare", help="prepare data")
    parser.add_argument("-c", "--conf", dest="config", required=1,
            help="file with config file:1st column:path_to_fasta_file ; 2nd column:name")
    parser.add_argument("-o", "--out", dest="out", required=1,
            help="output dir")
    parser.add_argument("-l", "--minl", dest="minl", required=0,
            help="minimum length", default=18)
    parser.add_argument("-u", "--maxl", dest="maxl", required=0,
            help="maximum length", default=35)
    parser.add_argument("-e", "--minc", dest="minc", required=0,
            help="minimum counts", default=10)
    parser.add_argument("--min-shared", dest="min_shared", required=0,
            help="minimum shamples with same sequences", default=2)
    parser = _add_debug_option(parser)
    return parser


def add_subparser_collapse(subparsers):
    parser = subparsers.add_parser("collapse", help="collapse data")
    parser.add_argument("-f", "--fastq", dest="fastq", required=1,
                         help="fastq file"),
    parser.add_argument("-m", "--min", dest="minimum", default=1,
                        type=int,
                         help="Minimum number of counts required."
                              "Not recomended > 1. Could bias downstream"
                              "Analysis.")
    parser.add_argument("--min_size", default = 16, help = "Minimum size to be included.")
    parser.add_argument("-o", "--out",
                         dest="out", help="output file", required=1)
    parser = _add_debug_option(parser)
    return parser

    parser = _add_debug_option(parser)
    return parser


def add_subparser_predict(subparsers):
    parser = subparsers.add_parser("predict", help="predict smallRNA types")
    parser.add_argument("-j", "--json", dest="json", required=1,
            help="json file from seqcluster")
    parser.add_argument("--bed", help="BED ouput from cluster to clean BAM file")
    parser.add_argument("--bam", help="BAM file used in cluster subcmd.")
    parser.add_argument("-o", "--out", dest="out", required=1,
            help="dir of output files")
    parser.add_argument("--reference", required=1,
            help="reference fasta file with index")
    parser.add_argument("--coral", action='store_true',
            help="Run CoRaL pipeline")
    parser = _add_debug_option(parser)
    return parser


def add_subparser_target(subparsers):
    parser = subparsers.add_parser("target", help="Annotate miRNA targets.")
    parser.add_argument("--input", required=1,
                        help="list of miRNAs in 1 column format")
    parser.add_argument("--sps", required=1,
                        help="species")
    parser.add_argument("-o", "--out", dest="out", required=1,
                        help="dir of output files")
    parser.add_argument("--annotation", required=1,
                        help="Folder with tarets annotation. If bcbio installed would be the srnaseq ffolder")
    parser = _add_debug_option(parser)
    return parser


def add_subparser_cluster(subparsers):
    parser = subparsers.add_parser("cluster", help="cluster data")
    parser.add_argument("-a", "--afile", dest="afile", required=1,
                      help="aligned file in bam format")
    parser.add_argument("-m", "--ma", dest="ffile", required=1,
                      help="matrix file with sequences and counts for each sample")
    parser.add_argument("-g", "--gtf",
                       dest="gtf", help="annotate with gtf_file. It will use the 3rd column as the tag to annotate" +
                       "\nchr1    source  intergenic      1       11503   .       +       .       ")
    parser.add_argument("-b", "--bed",
                       dest="bed", help="annotate with bed_file. It will use the 4rd column as the tag to annotate" +
                       "\nchr1    157783  157886  snRNA   0       -")
    parser.add_argument("-o", "--out",
                       dest="out", help="output dir", required=1)
    parser.add_argument("-ref",
                       dest="ref", help="reference fasta")
    parser.add_argument("--mask",
                        help="bed file with regions to mask")
    parser = _add_debug_option(parser)
    parser.add_argument("-s", "--show", action="store_true",
                       dest="show", help="no show sequences", default=False)
    parser.add_argument("--non-un-gl", action="store_true",
                        help="remove Un_gl chromosomes", default=False)
    parser.add_argument("--method", choices=["most-voted", "split", "bayes"],
                       dest="method", help="most-voted, split, bayes", default='most-voted')
    parser.add_argument("--similar",
                       dest="similar", help="threshold to consider two clusters identicals", default=0.8)
    parser.add_argument("--min_seqs",
                       dest="min_seqs", help="threshold to consider a cluster as valid", default=10)
    parser.add_argument("--db",
                        help="prefix for sqlite3 database with results to use htmlViz plugin (in dev).")
    parser.add_argument("--feature_id",
                        help="name in GTF to use to annotate clusters", default='name')
    return parser


def add_subparser_stats(subparsers):
    parser = subparsers.add_parser("stats", help="stats data")
    parser.add_argument("-j", "--json", dest="json", required=0,
            help="json file from seqcluster")
    parser.add_argument("-m", "--ma", dest="ma", required=0,
            help="seqs.ma from prepare"),
    parser.add_argument("-a", "--sam", dest="sam", required=0,
            help="aligned file")
    parser.add_argument("-o", "--out",
                       dest="out", help="output dir", required=1)
    parser = _add_debug_option(parser)
    return parser


def add_subparser_simulator(subparsers):
    parser = subparsers.add_parser("simulator", help="simulate small RNA  from bed file")
    parser.add_argument("--bed",
                        help="bed file with position of precursors <=200 nt")
    parser.add_argument("--fasta", help = "fasta with precursors.")
    parser.add_argument("--out", dest="out", required=1,
                        help="dir of output files")
    parser.add_argument("-r", "--reference", dest="ref",
                        help="reference fasta file with index"),
    parser = _add_debug_option(parser)
    return parser