1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
|
#!/usr/local/bin/perl
#
# These are the things you might want to change. Scroll down for docs on the program
# or preferable go pod2text halfwise.pl
#
# so that the testing works at sanger centre. You could change this obviously
# on your site, though it may be easier to install bioperl correctly
use lib '/nfs/disk92/PerlSource/Bioperl/Releases/bioperl-0.04.2/';
BEGIN {
eval {
use Bio::Tools::Blast;
};
if ( $@ ) {
print STDERR ("\nThe bioperl distribution has not be installed.\n Read the installation instructions at the top of this script going pod2text halfwise.pl\nThe bioperl distribution can be found from http://bio.perl.org/\nThe halfwise script has been tested with 0.04.x releases\n");
exit(1);
}
}
$defopts = "-init wing -cace -cut 25 -aln 200 -quiet";
$half_db = "/somewhere/halfwise.db";
# this is for testing @sanger
#$half_db = "/nfs/disk21/birney/prog/wise2/perl/scripts/halfwise.db";
$hmmfetch = "hmmfetch";
$genewise = "genewisedb";
$hmmdb = "Pfam";
#
# The documentation is written as pod.
# to view it nicely use pod2text halfwise.pl
# or to make html use pod2html
#
=head1 NAME - halfwise
halfwise is a script designed to compare a large DNA sequence to Pfam
sensibly. It does the following steps:
=over
=item blast search
Compares the DNA sequence to a representive database of protein sequences
from Pfam
=item HMM retrieval
Retrieves the Pfam HMMs to compare to the DNA sequence using a liberal
threshold
=item GeneWise comparison
Retrieves those HMMs and uses genewisedb to compare the DNA sequence to the
Pfam HMMs.
=back
The blast search drastically reduces the number of HMMs needed to be considered
while not losing much sensitive due to a relatively comprehensive protein database
and a liberal cutoff
=head1 USAGE
Once everything is installed properely, using halfwise should be quite simple. Go
halfwise.pl
to get basic help and
halfwise.pl -help
for more detailed options.
The basic mode of running is just to take a DNA sequence file and write to stdout, like
halfwise.pl cosmid.fasta > cosmid.hlf
For more description of the options you can use with the genewisedb program (and this
includes a large number of output options) go
halfwise.pl -help
and also read the postscript documentation (wise2.ps) that came in this distribution
=head1 INSTALLATION
For halfwise to work you need a number of different software packages installed. This
means that halfwise is a very robust and working script (and didnt take long to write!)
but, you are going to have to hop around installing things first. Sorry!
Everything installs very easily! All the components required for halfwise can be found at
ftp://ftp.sanger.ac.uk/pub/birney/wise2/halfwise, so you can make a one stop shop!
You need
=over
=item Wise2.1.16 or higher level in the 2.1 release series
You should have it now, as you are reading this!
=item HMMer2.1.1 or higher
This is for the HMM retrieval system. Available from http://hmmer.wustl.edu or
the above ftp site (the HMMer2 directory in the Wise2 package does not have the
correct facilities).
=item bioperl central distribution, 0.04.2 or higher in the 0.04 release series
This is available from http://bio.perl.org or the above ftp site
=item The blast executables
available from http://ncbi.nlm.nih.gov or http://blast.wustl.edu/ (I did not make
these available from the ftp site as you probably have them already installed!)
=item Pfam and halfwise databases
available from the above ftp site
=back
For the software packages, follow the instructions for how to install them. Briefly
=over
=item Wise2.1.16
type 'make all' in the wise2.1.16 directory and then copy the contents of the bin
directory to /usr/local/bin or somewhere on your path
=item HMMer2.1.1
type './configure' followed by 'make' in the hmmer-2.1.1 directory and then 'make install'
(assuming you want to install in /usr/local/bin)
=item bioperl-0.04.2
It is a standard CPAN type module. Go 'perl Makefile.PL', 'make', 'make install' in the
bioperl-0.04.2 directory. you do not need to install the Compiled extension, but if you
want to use other parts of bioperl, you may need them, so there is no reason not to ;)
=back
For the databases, pick up the Pfam database and the halfwise.db. Put them somewhere where
you usually put databases. setdb or pressdb the halfwise.db ready for blast (depending
on what flavour of blast you are using - your database manager should know this). Also
the easiest thing to do is to setenv HMMERDB to the directory where the Pfam database is.
You then need to index the Pfam database going 'hmmindex Pfam'.
finally you need to adjust the variables at the top the halfwise script so that everything
works.
This all seems complicated but its not so bad. Here is a check list.
o pick up and install wise2.1.16
- make all in wise2.1.16
- cp bin/* /usr/local/bin
- setenv WISECONFIGDIR to xxx/wise2.1.16/wisecfg
o pick up and install hmmer2.1.1
- ./configure
- make
- make install
o pick up and install bioperl
- perl Makefile.PL
- make
- make install
o check you have blast installed
o pick up halfwise.db and Pfam
- setdb halfwise.db
- hmmindex Pfam
- setenv HMMERDB /somewhere/data/
o change the location of the databases etc
at the top of this script
If you have any problems, just email me <birney@sanger.ac.uk>
=cut
#
# Make sure halfwise is there...
#
$blast_db_dir = $ENV{'BLASTDB'};
if( ! -e $half_db && ! (defined $blast_db_dir && -e "$blast_db_dir/$half_db" )) {
die("The halfwise database [$half_db] has not been correctly indicated.\nPlease read the installation instructions at the top of this script\n");
}
$filename = shift; # dna sequence we hope!
if( !defined $filename ) {
print "halfwise <dna-sequence-fasta> genewisedb-options\n";
print "searches Pfam using initial blast followed by genewise\n";
print " The default genewise options are $defopts\n\n";
print "Genewisedb options are listed by going halfwise -help\n";
exit(1);
}
# if help - print the help out from genewisedb...
if( $filename =~ "-help" ) {
print "halfwise <dna-sequence-fasta> genewisedb-options\n";
print " The default genewise options are $defopts\nGenewisedb options\n\n";
open(GDB,"$genewise -help |");
while(<GDB>) {
print;
}
close(GDB) || die "Could not close pipe to genewisedb executable [$genewise]. Installation problem?";
exit(1);
}
$verbose = 0;
if( @ARGV > 0 ) {
$opts = join(' ',@ARGV);
} else {
$opts = $defopts;
}
$verbose && print STDOUT "Doing Blast...\n";
# make the blast output
# BLASTMAT=/nfs/disk100/pubseq/blastdb/
#$ENV{'BLASTMAT'} = "/nfs/disk100/pubseq/blastdb/";
if( system("blastx $half_db $filename -V=10000 -B=10000 > $filename.blx.$$") != 0 ) {
die "There blastx search did not complete correctly\n";
}
$verbose && print STDOUT "Done\n";
eval {
$blast = Bio::Tools::Blast->new(
-file => "$filename.blx.$$",
-parse => 1,
-signif => '1e-3',
-strict => 1,
);
};
if( $@ ) {
print "#No hits found in the blast search\n";
exit;
}
# ok ... now loop over the HSPs and print them out!
foreach $hit ( $blast->hits ) {
$name = $hit->name;
$verbose && print STDOUT "Reading $name\n";
if( !($name =~ /^([^\/]+)\//) ) {
warn("Bad hit name $name");
}
$acc = $1;
$hash{$acc} = 1;
}
# build minidb of the HMMs
open(TEMPDB,">$filename.dbhmm.$$");
$count = 0;
foreach $acc ( keys %hash ) {
$count++;
$verbose && print STDOUT "Loading $acc\n";
open(GETZ,"$hmmfetch $hmmdb $acc |");
while(<GETZ>) {
print TEMPDB $_;
}
close(GETZ);
}
close(TEMPDB);
$verbose && print STDOUT "Collected $count hmms\n";
# run genewisedb
open(GDB,"$genewise -pfam $filename.dbhmm.$$ -dnas $filename $opts |") || die "Could not open genewise - ERROR! $!";
while(<GDB>) {
print;
}
close(GDB) || die "Could not open genewise pipe (well close it anyway) $! $?";
unlink("$filename.dbhmm.$$");
unlink("$filename.blx.$$");
exit(0); # for james...
|