File: last-test.sh

package info (click to toggle)
last-align 1651-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,688 kB
  • sloc: cpp: 44,419; python: 5,217; ansic: 1,938; sh: 710; makefile: 457
file content (300 lines) | stat: -rwxr-xr-x 9,136 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#! /bin/sh

# Exercise LAST programs, and compare the output to a reference
# output.  More tests should be added!

try () {
    echo TEST "$@"
    eval "$@"
    echo
}

cd $(dirname $0)

# Make sure we use this version of LAST:
PATH=../bin:$PATH

dnaSeq=galGal3-M-32.fa
protSeq=Q2LCP8.fa
fastq=SRR001981-1k.fastq
gc=../examples/vertebrateMito.gc
db=/tmp/last-test

oldFasta="-r1 -q1 -a7 -b1"
oldFastq="-r6 -q18 -a21 -b9"

trap 'rm -f $db*' EXIT

{
    lastdb -uMURPHY10 $db /dev/null  # this triggered a getopt reset bug
    lastdb $db /dev/null
    lastdb -D $db
    lastal $db /dev/null

    # spaced seeds, soft-masking, centroid alignment, matrix file
    lastdb -c -m110 -C3 -R10 $db $dnaSeq
    try lastal -fMAF -u1 -j5 -p ../data/HOXD70.mat -z3400 -e2500 $db $dnaSeq

    # multiple volumes & query batches & multiple query files
    lastdb --bits=4 -m1 -s1 -C2 -R10 $db $dnaSeq
    lastdb -D $db
    try lastal -r1 -fTAB -i1 -w0 -e40 $db $dnaSeq /dev/null

    # match-counting, with multiple query batches
    try lastal -j0 -i1 -s0 $db $dnaSeq

    # FASTQ quality scores
    try lastal $oldFastq -Q1 -e90 -a9 $db $fastq

    # gapless translated alignment & genetic code file
    lastdb -p -R10 $db $protSeq
    try lastal -F12 -pBL62 -e40 -G $gc -j1 $db $dnaSeq
    try lastal -F12 -pBL62 -e40 -G2 -j1 $db $dnaSeq

    # subset seed file, soft-masking
    lastdb -c -R10 -u ../data/YASS.seed $db $dnaSeq
    try lastal -r1 -s0 -f0 -e18 $db $dnaSeq
    try lastal -r1 --reverse $db $dnaSeq

    # asymmetric scoring matrix
    try lastal -s0 -f0 -p asymmetric.mat -e2000 $db $dnaSeq

    # FASTQ-Illumina quality scores
    lastdb -uNEAR -R10 $db $dnaSeq
    try lastal -Q3 -e110 $db illumina100.txt

    # PRB-format quality data
    try lastal -Q4 -e90 $db mouse_tss_prb.txt

    # probabilistic alignment with quality scores
    try lastal -Q1 -j6 -e90 -a9 $db $fastq

    # sparse index, generalized affine gap costs, self-alignment
    lastdb -w2 -c -R10 $db $dnaSeq
    try lastal -r3 -q3 -a21 -c2 -e60 -f0 $db $dnaSeq

    # generalized affine gaps, frameshifts, tabular output
    lastdb -p -c -R10 $db $protSeq
    try lastal -F12 -pBL62 -c2 -e40 -f0 $db $dnaSeq

    # gapless alignment, protein-protein alignment, seed freq
    try lastal -j1 -f0 -e37 -m100 $db $protSeq

    # fastq-versus-fastq, seed freq
    lastdb -Q1 -R10 $db sd-ccs-100.fq
    try lastal -Q1 -r1 -q2 -a1 -b1 -e44 -m100 -s0 $db sd-ccs-100.fq

    # incomplete sorting, lastal on one volume
    lastdb -i10 -s1 $db $dnaSeq
    try lastal $oldFastq -Q1 -e90 -a9 -f0 ${db}0 $fastq

    # multiple seeds, transition constraints
    lastdb -c -R10 -m 11101T011T11,111001010010111 $db $dnaSeq
    try lastal -r1 -s0 -f0 -e18 $db $dnaSeq

    # Iedera notation
    lastdb -c -R10 -m '#@#--##--#-#' $db $dnaSeq
    try lastal -r1 -s0 -f0 -e18 $db $dnaSeq

    # overlap alignment, tabular output ending in gaps
    lastdb -uNEAR -R10 $db $dnaSeq
    try lastal -T1 -Q1 -e60 -a9 -f0 $db $fastq

    # probabilistic overlap alignment
    try lastal -T1 -Q1 -e60 -a9 -j4 $db $fastq

    # fastq-versus-fasta gapless overlap alignment
    try lastal -T1 -Q1 -e60 -j1 -fTAB $db $fastq

    # expected counts
    try lastal $oldFasta -s0 -e18 -j7 $db $dnaSeq

    # overlap alignment, hitting edge of ref seq, fastq
    head -n21 $dnaSeq | cut -c-35 | lastdb -uNEAR $db
    try lastal -T1 -Q1 -e60 -a9 -f0 $db $fastq

    # named multi-seed, sparse query seeding
    lastdb -c -R10 -uMAM8 $db hg19-M.fa
    try lastal -r1 -e34 -k128 -f0 $db galGal3-M-32.fa

    # named score matrix, sparse query seeding
    try lastal -pHOXD70 -e4500 -k128 -f0 $db galGal3-M-32.fa

    # MAM4, gapless alignment culling
    lastdb -uMAM4 $db hg19-M.fa
    try lastal -r1 -e34 -C2 -f0 $db galGal3-M-32.fa

    # minimum seed length
    try lastal -r1 -e34 -f0 -l30 $db galGal3-M-32.fa

    # match-counting with min & max lengths
    lastdb -m1 $db $dnaSeq
    try lastal -j0 -l4 -L11 -s0 $db $dnaSeq

    lastdb -i10 $db tttttccccc.fa
    try lastal -r1 -e5 -f0 $db ttttt.fa | grep -v '^#'

    # tantan masking on DNA
    lastdb -c -U66 $db galGal3-M-32.fa
    try lastal -r1 -e40 $db hg19-M.fa

    # hard-masking
    try lastal -r1 -e40 -u3 -fTAB $db hg19-M.fa

    # -J1
    lastal -J1 -fTAB -p hufu.train $db hg19-M.fa
    lastal -J1 -Q1 -D1000 -p hufu.train $db $fastq

    # tantan masking on protein
    lastdb -pcR01 $db Q2LCP8.fa
    try lastal -e100 $db Q5GS15.fa

    # tantan masking for translated alignment
    try lastal -F15 -pBLOSUM62 -e100 $db galGal3-M-32.fa

    # AT-rich DNA, tantan
    lastdb -cR02 $db at-rich.fa
    try lastal -pAT77 -e100 -s0 $db at-rich.fa

    # fastq + tantan
    lastdb -uNEAR $db $dnaSeq
    try lastal -Q1 -a15 -b3 -e80 $db nano.fq

    # fasta query versus fastq reference
    lastdb -Q1 -R10 $db sd-ccs-100.fq
    lastdb -D $db
    try lastal -r1 -a1 -D1000 $db galGal3-M-32.fa

    # prb query versus fastq reference
    try lastal $oldFastq -Q4 -a1 -D100 $db mouse_tss_prb.txt

    # fastq DNA versus protein
    lastdb -pcR00 $db Q2LCP8.fa
    try lastal -Q1 -pBL62 -F12 -D1000 $db sd-ccs-100.fq

    # protein-codon alignment
    lastdb -qR01 $db Q2LCP8.fa
    try lastal -Q1 -pBL62codon.mat -F12 -t3.08611 -e36 -d29 $db sd-ccs-100.fq
    try lastal -Q1 -pbadcodon.mat -a17 -F0 -j4 -X1 -D1000 $db sd-ccs-100.fq
    try lastal -Q1 -pbadcodon.mat -a17 -b1 -F9,9,9,9 -X1 -D1e3 -j4 $db sd-ccs-100.fq
    try lastal -Q1 -pBL62 -b1 -F3,3,3,3 -X1 -j4 -e56 $db sd-ccs-100.fq
    lastdb -qcR01 -B1 $db Q2LCP8.fa
    lastdb -D $db
    try lastal -Q1 -pbadcodon.mat -a17 -b1 -F9,9,9,9 -X1 -D1e3 $db sd-ccs-100.fq

    # BlastTab format
    lastdb -pR01 $db Q2LCP8.fa
    try lastal -fBlastTab -pBL62 -b1 -F15 -D1e3 $db galGal3-M-32.fa

    # BlastTab+ format
    try lastal -fBlastTab+ -pBL62 -b1 -F15 -D1e3 $db galGal3-M-32.fa

    # DNA-versus-protein alignment without frameshifts
    try lastal -j4 -pBL62 -b1 -F0 -D1e3 $db galGal3-M-32.fa

    try lastal -M $db Q5GS15.fa  # greedy protein-protein (YAGNI)

    # strand asymmetry
    lastdb $db hg19-M.fa
    try lastal -S1 -pBISF -Q1 -e120 -f0 -j4 $db bs100.fastq

    # culling
    try lastal -r1 -D1000 -fTAB -K2 $db galGal3-M-32.fa
    try lastal -r1 -D1000 -fTAB -K0 $db galGal3-M-32.fa

    # -H
    try lastal -r1 -H1 -fTAB $db galGal3-M-32.fa

    # strand asymmetry, 2 reference strands, and --split
    lastdb -uBISF -S2 $db hg19-M.fa
    try lastal -Q1 --split $db bs100.fastq
    try lastal -Q0 --split -sFR $db bs1.fastq bs2.fastq

    # lastal -2
    lastdb -uNEAR $db hg19-M.fa
    try "sed s:/2:: bs2.fastq | lastal -2 -Q1 -fTAB $db bs100.fastq -"
    try lastal -Q1 --split-d=RF $db bs1.fastq bs2.fastq

    # minimizers
    lastdb -W3 -R10 $db galGal3-M-32.fa
    try lastal -r1 -W19 -fTAB $db hg19-M.fa

    # minimum-difference alignment
    try lastal -W1 -M -fTAB $db hg19-M.fa

    # asymmetric gap costs
    try lastal -r1 -fTAB -j4 -A2 -B2 $db hg19-M.fa
    try lastal $oldFastq -fTAB -j4 -Q1 -e90 -a7 -A12 -B4 $db $fastq

    # fastq-ignore
    try lastal -fTAB -j4 -Q0 -e90 -a7 -A12 -B4 -b9 -r6 -q18 $db $fastq
    try lastal -j4 -Qkeep -e90 -a7 -A12 -B4 -b9 -r6 -q18 $db $fastq

    # first alignments only
    try lastal -r1 -N2 $db hg19-M.fa

    # "U" nucleotide
    try "tr Tt Uu < hg19-M.fa | lastal -r1 -N2 $db"

    # -z %
    try lastal -r1 -W19 -z50% -fTAB $db hg19-M.fa

    # -x g
    try lastal -r1 -W19 -x1g -fTAB $db hg19-M.fa

    # non-negative score matrix
    try lastal -L9 -m0 -j1 -q0 -d5 -n6 -fTAB $db tttttccccc.fa

    # fastq-versus-fastq gapless overlap alignment
    lastdb -Q1 -uNEAR -cR01 $db $fastq
    try lastal -Q1 -T1 -j1 -s0 $db $fastq

    lastdb $db dfam3-LTR22B1.fa
    lastal -r2 -q2 -a10 -X1 $db dfam3-LTR22C.fa
    lastal -r2 -q2 -a10 -X2 $db dfam3-LTR22C.fa
    lastal -r2 -q2 -a10 -X3 $db dfam3-LTR22C.fa

    # gap cost > SCHAR_MAX
    lastal -r12 -q12 -a128 $db dfam3-LTR22C.fa

    # word-restricted seeds, lastdb -B
    lastdb -uRY8-8 -B1 $db hg19-M.fa
    lastdb -uRY8 -B1 $db hg19-M.fa
    lastal $oldFastq -fTAB -q8 -b4 $db galGal3-M-32.fa

    # tricky Forward-Backward bug that happened once
    lastdb $db alli.fa
    lastal -j7 -r5 -q5 -a15 -b3 $db huma.fa

    lastdb --bits=4 -uNEAR $db od-xsr-100k.fa
    lastal -D10 --split-d=2 -p od.mat $db od-rna.fq

    # lastdb strands & volumes
    lastdb --bits=4 -S2 -s1 -m1 $db galGal3-M-32.fa
    lastal -s0 -fTAB -p hufu.train $db hg19-M.fa
    lastal -fTAB -p hufu.train $db hg19-M.fa

    lastdb --circular -c $db hg19-M.fa
    lastal -r1 -fTAB $db galGal3-M-32.fa
} 2>&1 |
grep -v version | diff -u last-test.out -

# Test: last-bisulfite, last-merge-batches, last-split, named seeds
lastdb -uBISF -R10 f hg19-M.fa
lastdb -uBISR -R10 r hg19-M.fa
../examples/last-bisulfite.sh f r bs100.fastq | grep -v '^#' | diff bs100.maf -
rm f.* r.*

./last-map-probs-test.sh
./last-pair-test.sh
./last-postmask-test.sh
./last-split-test.sh
./last-train-test.sh
./maf-convert-test.sh
./maf-cut-test.sh
./maf-linked-test.sh
./maf-swap-test.sh

# Test: lastdb, lastal, last-split, maf-sort, maf-join
cd ../examples
./multiMito.sh | diff multiMito.maf -