File: run-se-da.sh

package info (click to toggle)
genometools 1.6.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 50,576 kB
  • sloc: ansic: 271,876; ruby: 29,930; python: 5,106; sh: 3,083; makefile: 1,213; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (78 lines) | stat: -rwxr-xr-x 2,284 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/sh

# author: Stefan Kurtz, Nov. 2015

set -e -x

if test $# -eq 3
then
  minlen=$1
  minidentity=$2
  referencefile=$3
  queryfile=""
else
  if test $# -eq 4
  then
    minlen=$1
    minidentity=$2
    referencefile=$3
    queryfile=$4
  else
    echo "Usage: $0 <minlen> <minidentity> <referencefile> [queryfile]"
    exit 1
  fi
fi

reference=`basename $referencefile`
seedlength=14
maxfreq=21

# note that the following script randomly replaces wildcards by
# characters over the base alphabet. So for the same sequence and parameters
# the set of matches often varies over different runs.
scripts/convert2myersformat.rb $referencefile > ${reference}.fasta

if test $PACKAGES != ""
then
  MYERSPROG="$PACKAGES"/myers
else
  MYERSPROG="../myers"
fi

rm -f ${reference}.db
${MYERSPROG}/DAZZ_DB/fasta2DB ${reference}.db ${reference}.fasta
if test "${queryfile}" != ""
then
  query=`basename $queryfile`
  scripts/convert2myersformat.rb ${queryfile} > ${query}.fasta
  ${MYERSPROG}/DAZZ_DB/fasta2DB ${query}.db ${query}.fasta
  outputprefix="${reference}-${query}"
else
  query=${reference}
  outputprefix="${reference}"
fi

${MYERSPROG}/DALIGNER/daligner -t${maxfreq} -I -A -Y -e0.${minidentity} \
                   -k${seedlength} -l${minlen} \
                   ${query}.db ${reference}.db > ${outputprefix}-da.matches
rm -f ${reference}.db ${query}.db .${reference}.idx .${reference}.bps
rm -f ${query}.${reference}*.las .${query}.idx .${query}.bps

bin/gt encseq encode -sds no -md5 no -des no -indexname ${reference} \
                                           ${reference}.fasta
if test ${queryfile} != ""
then
  bin/gt encseq encode -sds no -md5 no -des no -indexname ${query} \
                                           ${query}.fasta
  qiioption="-qii ${query}"
else
  qiioption=""
fi

bin/gt seed_extend -ii ${reference} ${qiioption} -t ${maxfreq} -l ${minlen} \
                    -seedlength 14 -minidentity ${minidentity} -outfmt seed \
                    -v -overlappingseeds -bias-parameters -no-reverse \
                    -history 60 > ${outputprefix}-se.matches
rm -f ${query}.ssp ${query}.fasta ${query}.esq
rm -f ${reference}.ssp ${reference}.fasta ${reference}.esq
scripts/matched-seqpairs.rb ${outputprefix}-da.matches ${outputprefix}-se.matches