File: repfvsrepf.sh

package info (click to toggle)
genometools 1.6.1%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 50,412 kB
  • sloc: ansic: 271,241; ruby: 30,339; python: 4,880; sh: 3,193; makefile: 1,194; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (59 lines) | stat: -rwxr-xr-x 1,129 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/sh

#set -e -x

usage()
{
  echo "Usage: $0 minlength inputfile"
}

checkerror() 
{
  $1
  if test $? -ne 0
  then
    echo "failure: ${1}"
    exit 1
  fi
}

cleanhashlines()
{
  TMPFILE=`mktemp TMP.XXXXXX` || exit 1
  sed -e '/^#/d' -e 's/[ ][ ]*/ /g' $1 > ${TMPFILE}
  mv ${TMPFILE} $1
}

sortlines()
{
  TMPFILE=`mktemp TMP.XXXXXX` || exit 1
  sort -n $1 > ${TMPFILE}
  mv ${TMPFILE} $1
}

extractlines()
{
  TMPFILE=`mktemp TMP.XXXXXX` || exit 1
  /sw/bin/gawk '/.*/ {print $1 " " $3 " " $4 " " $5 " " $7}' $1 > ${TMPFILE}
  mv ${TMPFILE} $1
}

if test $# -ne 2
then
  usage
  exit 1
fi

minlength=$1
filename=$2

GTDIR=/Users/stefan/genometools
checkerror "${GTDIR}/bin/gt suffixerator -db ${filename} -indexname sfxidx -dna -suf -tis -lcp -pl"
checkerror "${GTDIR}/bin/gt repfind -l ${minlength} -r -ii sfxidx" > result.gt
cleanhashlines result.gt
extractlines result.gt
sortlines result.gt
checkerror "/Users/stefan/bin-ops/i686-apple-darwin/repfind.x -allmax -l ${minlength} -r -noevalue -nodistance $filename" > result.rep
cleanhashlines result.rep
sortlines result.rep
checkerror "diff -w result.rep result.gt"