File: gth.1.adoc

package info (click to toggle)
genomethreader 1.7.3%2Bdfsg-10
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 46,568 kB
  • sloc: ansic: 90,168; ruby: 1,769; makefile: 573; sh: 112; perl: 105
file content (415 lines) | stat: -rw-r--r-- 11,102 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
# gth(1)

## NAME

gth - predict genome structures

## SYNOPSIS

*gth* [option ...] -genomic file [...] -cdna file [...] -protein file [...]

## DESCRIPTION

Computes similarity-based gene structure predictions (spliced alignments)
using cDNA/EST and/or protein sequences and assemble the resulting spliced
alignments to consensus spliced alignments.

## OPTIONS

*-genomic* <file>::
  specify input files containing genomic sequences (mandatory option)

*-cdna* <file>::
  specify input files containing cDNA/EST sequences

*-protein* <file>::
  specify input files containing protein sequences

*-species* <species>::
  specify species to select splice site model which is most appropriate; possible species:
                  "human"
                  "mouse"
                  "rat"
                  "chicken"
                  "drosophila"
                  "nematode"
                  "fission_yeast"
                  "aspergillus"
                  "arabidopsis"
                  "maize"
                  "rice"
                  "medicago"
                  default: undefined

*-bssm*::
  read bssm parameter from file in the path given by the environment variable BSSMDIR, default: undefined

*-scorematrix*::
  read amino acid substitution scoring matrix from file in the
                  path given by the environment variable GTHDATADIR
                  default: BLOSUM62

*-translationtable*::
  set the codon translation table used for codon translation in
                   matching, DP, and output
                   default: 1

*-f*::
  analyze only forward strand of genomic sequences
                   default: no

*-r*::
  analyze only reverse strand of genomic sequences
                   default: no

*-cdnaforward*::
  align only forward strand of cDNAs
                   default: no

*-frompos*::
  analyze genomic sequence from this position
                   requires -topos or -width; counting from 1 on
                   default: 0

*-topos*::
  analyze genomic sequence to this position
                   requires -frompos; counting from 1 on
                   default: 0

*-width*::
  analyze only this width of genomic sequence
                   requires -frompos
                   default: 0

*-v*::
  be verbose
                   default: no

*-xmlout*::
  show output in XML format
                   default: no

*-gff3out*::
  show output in GFF3 format
                   default: no

*-md5ids*::
  show MD5 fingerprints as sequence IDs
                   default: no

*-o*::
  redirect output to specified file
                   default: undefined

*-gzip*::
  write gzip compressed output file
                   default: no

*-bzip2*::
  write bzip2 compressed output file
                   default: no

*-force*::
  force writing to output file
                   default: no

*-skipalignmentout*::
  skip output of spliced alignments
                   default: no

*-mincutoffs*::
  show full spliced alignments
                   i.e., cutoffs mode for leading and terminal bases is MINIMAL
                   default: no

*-showintronmaxlen*::
  set the maximum length of a fully shown intron
                   If set to 0, all introns are shown completely
                   default: 120

*-minorflen*::
  set the minimum length of an ORF to be shown
                   default: 64

*-startcodon*::
  require than an ORF must begin with a start codon
                   default: no

*-finalstopcodon*::
  require that the final ORF must end with a stop codon
                   default: no

*-showseqnums*::
  show sequence numbers in output
                   default: no

*-pglgentemplate*::
  show genomic template in PGL lines 
                   (switch off for backward compatibility)
                   default: yes

*-gs2out*::
  output in old GeneSeqer2 format
                   default: no

*-maskpolyatails*::
  mask poly(A) tails in cDNA/EST files
                   default: no

*-proteinsmap*::
  specify smap file used for protein files
                   default: protein

*-noautoindex*::
  do not create indices automatically
                   except for the .dna.* files used for the DP.
                   existence is not tested before an index is actually used!
                   default: no

*-createindicesonly*::
  stop program flow after the indices have been created
                   default: no

*-skipindexcheck*::
  skip index check (in preprocessing phase)
                   default: no

*-minmatchlen*::
  specify minimum match length (cDNA matching)
                   default: 20

*-seedlength*::
  specify the seed length (cDNA matching)
                   default: 18

*-exdrop*::
  specify the Xdrop value for edit distance extension (cDNA
                   matching)
                   default: 2

*-prminmatchlen*::
  specify minimum match length (protein matches)
                   default: 24

*-prseedlength*::
  specify seed length (protein matching)
                   default: 10

*-prhdist*::
  specify Hamming distance (protein matching)
                   default: 4

*-online*::
  run the similarity filter online without using the complete
                   index (increases runtime)
                   default: no

*-inverse*::
  invert query and index in vmatch call
                   default: no

*-exact*::
  use exact matches in the similarity filter
                   default: no

*-gcmaxgapwidth*::
  set the maximum gap width for global chains
                   defines approximately the maximum intron length
                   set to 0 to allow for unlimited length
                   in order to avoid false-positive exons (lonely exons) at the
                   sequence ends, it is very important to set this parameter
                   appropriately!
                   default: 1000000

*-gcmincoverage*::
  set the minimum coverage of global chains regarding to the
                   reference sequence
                   default: 50

*-paralogs*::
  compute paralogous genes (different chaining procedure)
                   default: no

*-enrichchains*::
  enrich genomic sequence part of global chains with additional
                   matches
                   default: no

*-introncutout*::
  enable the intron cutout technique
                   default: no

*-fastdp*::
  use jump table to increase speed of DP calculation
                   default: no

*-autointroncutout*::
  set the automatic intron cutout matrix size in megabytes and
                   enable the automatic intron cutout technique
                   default: 0

*-icinitialdelta*::
  set the initial delta used for intron cutouts
                   default: 50

*-iciterations*::
  set the number of intron cutout iterations
                   default: 2

*-icdeltaincrease*::
  set the delta increase during every iteration
                   default: 50

*-icminremintronlen*::
  set the minimum remaining intron length for an intron to be
                   cut out
                   default: 10

*-nou12intronmodel*::
  disable the U12-type intron model
                   default: no

*-u12donorprob*::
  set the probability for perfect U12-type donor sites
                   default: 0.99

*-u12donorprob1mism*::
  set the prob. for U12-type donor w. 1 mismatch
                   default: 0.90

*-probies*::
  set the initial exon state probability
                   default: 0.50

*-probdelgen*::
  set the genomic sequence deletion probability
                   default: 0.03

*-identityweight*::
  set the pairs of identical characters weight
                   default: 2.00

*-mismatchweight*::
  set the weight for mismatching characters
                   default: -2.00

*-undetcharweight*::
  set the weight for undetermined characters
                   default: 0.00

*-deletionweight*::
  set the weight for deletions
                   default: -5.00

*-dpminexonlen*::
  set the minimum exon length for the DP
                   default: 5

*-dpminintronlen*::
  set the minimum intron length for the DP
                   default: 50

*-shortexonpenal*::
  set the short exon penalty
                   default: 100.00

*-shortintronpenal*::
  set the short intron penalty
                   default: 100.00

*-wzerotransition*::
  set the zero transition weights window size
                   default: 80

*-wdecreasedoutput*::
  set the decreased output weights window size
                   default: 80

*-leadcutoffsmode*::
  set the cutoffs mode for leading bases
                   can be either RELAXED, STRICT, or MINIMAL
                   default: RELAXED

*-termcutoffsmode*::
  set the cutoffs mode for terminal bases
                   can be either RELAXED, STRICT, or MINIMAL
                   default: STRICT

*-cutoffsminexonlen*::
  set the cutoffs minimum exon length
                   default: 5

*-scoreminexonlen*::
  set the score minimum exon length
                   default: 50

*-minaveragessp*::
  set the minimum average splice site prob.
                   default: 0.50

*-duplicatecheck*::
  criterion used to check for spliced alignment duplicates,
                   choose from none|id|desc|seq|both
                   default: both

*-minalignmentscore*::
  set the minimum alignment score for spliced alignments to be
                   included into the set of spliced alignments
                   default: 0.00

*-maxalignmentscore*::
  set the maximum alignment score for spliced alignments to be
                   included into the set of spliced alignments
                   default: 1.00

*-mincoverage*::
  set the minimum coverage for spliced alignments to be
                   included into the set of spliced alignments
                   default: 0.00

*-maxcoverage*::
  set the maximum coverage for spliced alignments to be
                   included into the set of spliced alignments
                   default: 9999.99

*-intermediate*::
  stop after calculation of spliced alignments and output
                   results in reusable XML format. Do not process this output
                   yourself, use the ``normal'' XML output instead!
                   default: no

*-sortags*::
  sort alternative gene structures according to the weighted
                   mean of the average exon score and the average splice site
                   probability
                   default: no

*-sortagswf*::
  set the weight factor for the sorting of AGSs
                   default: 1.00

*-exondistri*::
  show the exon length distribution
                   default: no

*-introndistri*::
  show the intron length distribution
                   default: no

*-refseqcovdistri*::
  show the reference sequence coverage distribution
                   default: no

*-first*::
  set the maximum number of spliced alignments per genomic DNA
                   input. Set to 0 for unlimited number.
                   default: 0

*-help*::
  display help for basic options and exit

*-help+*::
  display help for all options and exit

*-version*::
  display version information and exit