File: makeblastdb.sh

package info (click to toggle)
biomaj3 3.1.24-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 784 kB
  • sloc: python: 4,495; sh: 359; makefile: 154
file content (212 lines) | stat: -rwxr-xr-x 4,921 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/bin/bash

# Script for Biomaj PostProcess
# author : ofilangi, osallou
# date   : 19/06/2007
# update : 22/10/2010 fix bug in generated alias file + a few cleanups
#          23/12/2015 use makeblastdb for ncbi blast+
#
#  -title  Title for database file [String]  Optional
#  -in  Input file(s) for formatting [File In]  Optional
#  -logfile  Logfile name: [File Out]  Optional
#    default = formatdb.log
#  -dbtype nucl
#  -parse_seqids
#

#----------
#GLOBAL DEF
#----------
BLASTDB_DIR="$datadir/index-blast"; # Path where aliases files should be generated
mkdir -p $BLASTDB_DIR
FORMATDB="makeblastdb"; # Path to formatdb executable


#----------
# FUNCTIONS 
#----------
# createAlias: builds an alias file
# arg1: file to write to
# arg2: bank name
# arg3: db file list
createAlias() {
  local file=$1;
  local nomBanque=$2;
  local lFiles=$3;

  rm -f $file;
  echo "#" > $file
  echo "# Alias file created "`date` >>$file 
  echo "#" >>$file ;
  echo "#">> $file ;
  echo "TITLE "$nomBanque >> $file; 
  echo "#" >> $file;
  echo "DBLIST "$lFiles >>$file;
  echo "#" >> $file;
  echo "#GILIST" >> $file;
  echo "#" >> $file;
  echo "#OIDLIST" >> $file;
  echo "#" >> $file;
}

#-----
# MAIN
#-----

if (test $# -ne 4) then
  echo "arguments:" 1>&2
  echo "1: input files"
  echo "2: working directory" 1>&2
  echo "3: formatdb options (without -in for input file)" 1>&2
  echo "4: bank name" 1>&2
  echo `formatdb --help`;
  exit -1
fi

relWorkDir=`echo "$2" | sed "s/\/*$//"` # remove useless trailing slash

workdir=$datadir/$dirversion/future_release
workdir=$workdir/$relWorkDir;

rm -rf $workdir;
mkdir -p $workdir ;

if ( test $? -ne 0 ) then
  echo "Cannot create $workdir." 1>&2 ;
  exit 1;
fi

cd $workdir

# Some vars for links creation
back="";
dir=$relWorkDir;
OLDIFS=$IFS;
IFS="/";
for i in $dir
do
  back="../"$back;
done
IFS=$OLDIFS;

# Create links to input files into the working dir
listFile="";

for expression in $1
do
  # the basename can be a regex
  lsFile=`ls $datadir/$dirversion/future_release/$expression`;
  if ( test $? -ne 0 ) then
    echo "No input file found in dir `pwd`." 1>&2 ;
    exit 1
  fi
  baseFile=`dirname $expression`;
  for f in $lsFile
  do
    name=`basename $f`;
    rm -f $4.p*;
    rm -f $4.n*;
    nameLink=`echo $name | cut -d"." -f1`;
    ln -s $back/$baseFile/$name $nameLink;
    if ( test $? -ne 0 ) then
      echo "Cannot create link. [ln -s $back$f $name]" 1>&2 ;
      exit 1
    fi
    if (test -z "$listFile") then
      listFile=$nameLink;
    else
      listFile=$nameLink" "$listFile;
    fi
  done
done

echo "Input sequence file list: $listFile";

if (test -z "$listFile") then
  echo "No input file found." 1>&2 ;
  exit 1
fi

nameB=$4;
echo "Database name: $nameB";

echo "Working in "`pwd`;
echo "Launching formatdb [formatdb -in $listFile $3 -out $nameB]";

# Execute formatdb
$FORMATDB -in "$listFile" $3 -out $nameB;

formatdbResult=$?
if ( test $formatdbResult -ne 0 ) then
  echo "Formatdb failed with status $formatdbResult" 1>&2 ;
  exit 1
fi

echo "##BIOMAJ#blast###$2$nameB"

# Delete temp files and links
#-------------------------------------------------------------
rm -f $listFile;
rm -f formatdb.log

# Add generated files to biomaj postprocess dependance
echo "Generated files:";
for ff in `ls *`
do
  echo $PP_DEPENDENCE$PWD/$ff;
done

goodPath=`readlink $datadir/$dirversion/future_release -s -n`;
if ( test $? -ne 0 ) then
  echo "Failed to get version path: readlink returned with an error [$goodPath]" 1>&2 ;
  exit 1
fi

# Search for nal files which are sometimes generated by formatdb.
lsAl=`ls *.?al 2> /dev/null`;

if ( test $? -ne 0 ) then
  echo "No alias file found.";
  lsAl="";
else
  echo "Generated alias files:"
  echo "$lsAl";
fi

# If nal files were generated, use them to generate nal files in $BLASTDB_DIR
for fileIndexVirtuel in $lsAl 
do
  echo "Found alias file: [$fileIndexVirtuel]";
  listIndex=`more $fileIndexVirtuel | grep DBLIST`;
  listFile2="";
  for f in $listIndex
  do
    if (test $f != "DBLIST") then
      listFile2=$goodPath/$relWorkDir/$f" "$listFile2;
    fi
  done
  echo "Creating alias in [$BLASTDB_DIR/$fileIndexVirtuel]";
  createAlias $BLASTDB_DIR/$fileIndexVirtuel $nameB "$listFile2"
done

# Else, if no nal file was generated by formatdb, create them
if (test -z "$lsAl") then
  ext=`ls | grep .*hr$ | tail -c5 | head -c2`al;
  echo "Creating alias file [$PWD/$4$ext]";
  
  listNhr=`ls *.*hr | sed 's/\..hr$//g'`;
  listFileNalRel=""; # List of blast db files, relative path
  listFileNalAbs=""; # List of blast db files, absolute path
  for f in $listNhr
  do
    listFileNalRel=$f" "$listFileNalRel;
    listFileNalAbs=$goodPath/$relWorkDir/$f" "$listFileNalAbs;
  done

  createAlias $4$ext $nameB "$listFileNalRel";
  echo $PP_DEPENDENCE$PWD/$4$ext;
  
  echo "Creating alias in [$BLASTDB_DIR/$4$ext]";
  createAlias $BLASTDB_DIR/$4$ext $nameB "$listFileNalAbs" ;
fi