File: fastDNAml_boot

package info (click to toggle)
fastdnaml 1.2.1-1
  • links: PTS
  • area: main
  • in suites: potato
  • size: 516 kB
  • ctags: 379
  • sloc: ansic: 4,171; sh: 570; makefile: 64
file content (189 lines) | stat: -rw-r--r-- 6,088 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#! /bin/sh
#

PATH=/usr/lib/fastdnaml/bin:${PATH}

comm=`echo "$0" | sed -e 's&^.*/&&g'`
bootflag="-boots"
cleanflag="-noclean"
maxflag="-max"
outflag="-out"
niceflag="-nice"
seedflag="-seed"
stdopt="| jumble"

cleanUp=1
jumbles=10
nice=10
remaining=1
saveOut=0
seed="$$`date +%M%S`"

#  The spaces in the echo and grep are required because of a "feature" that
#  causes /bin/sh echo to consume ANY leading argument that begins with -n.

while echo " $1" | grep "^ -" >/dev/null; do
    if   test $# -lt 2;        then break
    elif test $1 = $maxflag;   then jumbles=$2; shift; shift
    elif test $1 = "-m";       then jumbles=$2; shift; shift
    elif test $1 = $cleanflag; then cleanUp=0; shift
    elif test $1 = "-c";       then cleanUp=0; shift
    elif test $1 = $niceflag;  then nice=$2; shift; shift
    elif test $1 = "-n";       then nice=$2; shift; shift
    elif test $1 = $outflag;   then saveOut=1; shift
    elif test $1 = "-o";       then saveOut=1; shift
    elif test $1 = $seedflag;  then seed=$2; shift; shift
    elif test $1 = "-s";       then seed=$2; shift; shift
    elif test $1 = $bootflag;  then remaining=$2; shift; shift
    elif test $1 = "-b";       then remaining=$2; shift; shift
    elif test $1 = "-";        then shift; break
    else echo "Bad flag:  $*"; while test $# -gt 0; do shift; done; break
    fi
done

if test $# -eq 2; then
    opts="$stdopt";
elif test $# -eq 3; then
    if test -n "$3"; then opts="$stdopt | $3"
    else opts="$stdopt"
    fi
else
    cleanprm="[$cleanflag]"
    saveprm="[$saveflag]"
    cntprm="[$bootflag nboot]"
    maxprm="[$maxflag maxjumble]"
    niceprm="[$niceflag nicevalue]"
    seedprm="[$seedflag seed]"
    optprm="[ "'"'"dnaml_opt1 [ | dnaml_opt2 [...]]"'"'" ]"
    echo "
Usage: $comm  $cntprm  $seedprm\\
               $maxprm  $niceprm  $cleanprm  $saveprm\\
               in_file  n_best  $optprm

For the current bootstrap seed, the sequence input order is jumbled (up to
maxjumble times) until the same best tree is found n_best times.  The output
files are then reduced to a summary of the scores produced by jumbling, and one
example of the best tree.  The number process is then repeated with new
bootstrap seeds until nboot samples have been analyzed.

Boot and jumble are included by the script and should not be specified by the
user or in the data file.  AdditionalfastDNAml program options are enclosed in
quotes, and separated by vertical bars (|).

Flags and parameters:

    in_file -- name of the input data file
    n_best -- input order is jumbled (up to maxjumble times) until same tree
              is found n_best times
    $bootflag nboot -- number of different bootstrap samples (Default=1)
    $seedflag seed -- seed for first bootstrap (Default is based on the process
                  ID and time of day)
    $maxflag maxjumble -- maximum attempts at replicating inferred tree
                      (Default=10)
    $niceflag nicevalue -- run fastDNAml with specified nice value (Default=10)
    $cleanflag -- inhibits cleanup of the files for the individual jumbles
    $saveflag -- inhibits cleanup of the text output from fastDNAml
"
    exit
fi

if test $cleanUp -ne 0; then cleanflag=""; fi
if test $saveOut -eq 0; then outflag=""; fi

if   test -f "$1"; then
    root=`echo "$1" | sed -e 's/\.phylip$//' -e 's/\.phy$//'`; in="$1"
elif test -f "$1.phy"; then
    root="$1"; in="$1.phy"
elif test -f "$1.phylip"; then
    root="$1"; in="$1.phylip"
else
    echo "$comm: Unable to find input file: $1"; exit
fi

seed=`echo $seed | awk '{printf("%09d",$1)}'`
out=`echo "${root}_$seed" | sed -e 's&^.*/&&'`

#  Check for reuse of same random seed:

if test ! -f "$out.tree" -a ! -f "$out.out"; then

#  Loop over jumble orders:

    while
        if test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -gt 0; then
            nJumble=`grep '^Ln Likelihood' $out.[0-9]* /dev/null | wc -l`
            nBest=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
                  sed -e 's/^.*:Ln Likelihood =\(.*\)$/\1/g' | sort -nr +0 |
                  awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'`
        else
            nBest=0
            nJumble=0
        fi

        test $nBest -lt $2 -a $nJumble -lt $jumbles
    do
        eval "bootstrap $seed < $in  $opts |
              nice -$nice out.PID fastDNAml $out" >/dev/null || exit
    done

    if test $cleanUp -ne 0; then
#
#     clean_jumbles
#
#     Check for files

        if test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -eq 0; then
            echo "$comm: No files found for $out"
            exit
        fi

#     Find file suffix with the best score

        pid=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
            sed 's/^\(.*\):Ln Like.*=\(.*\)$/\2	\1/' |
            sort -nr +0 | head -1 | sed -e 's/^[^	]*	//' -e 's/^.*\.//'`

        if test -z "$pid"; then
            echo "$comm: No likelihoods found for $out"
            exit
        fi

#     Move output and treefile to new names

        treenew="$out.tree"
        treeold="treefile.$pid"
        checkpt="checkpoint.$pid"

        if   test -f "$treeold"; then  mv "$treeold" "$treenew"
        elif test -f "$checkpt"; then  tail -1 "$checkpt" >"$treenew"
        else echo "$comm: Cannot find tree file.  Bootstrap aborted."; exit
        fi
        rm -f  "$checkpt"

        oldname="$out.$pid"
        if test $saveOut -ne 0; then  mv  "$oldname"  "$out.out"
        else rm -f "$oldname"
        fi

#     Remove other output, tree and checkpoint files:

        if test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -gt 0; then
            pids=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
                sed -e 's/^\(.*\):Ln Like.*$/\1/' -e 's/^.*\.//'`

            for pid in $pids; do
                rm -f  "$out.$pid"  "treefile.$pid"  "checkpoint.$pid"
            done
        fi

#     End of clean_jumbles

    fi
    remaining=`expr $remaining - 1`
fi

#  Check number of replicates:

if test $remaining -gt 0; then
    $0 $bootflag $remaining $maxflag $jumbles $cleanflag $outflag $niceflag $nice "$@" &
fi