File: custom_writer.pl

package info (click to toggle)
bioperl 1.7.8-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 35,788 kB
  • sloc: perl: 94,019; xml: 14,811; makefile: 20
file content (101 lines) | stat: -rw-r--r-- 3,171 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/perl

# Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
# for producing custom output of Blast hit data from a Blast report 
# input stream.
#
# Here we define a custom SearchWriterI object that ouputs just the data we want
# from each BLAST report.
# 
# NOTE: If you just want pick and choose which columns you want 
# in the output table, you don't need to create your own custom
# SearchWriterI implementation as we do here. HitTableWriter and HSPTableWriter
# are configurable as to what columns and order you want. 
# The hitwriter*.pl and hspwriter.pl examples in this directory
# illustrate this.
#
# For a complete list of columns, see the docs for these modules:
#   Bio::SearchIO::Writer::HitTableWriter
#   Bio::SearchIO::Writer::HSPTableWriter
#
# This example serves as an illustration of how to use the 
# SearchWriterI api and plug it in to a SearchIO parser,
# which you may want to do if you want to generate data column(s)
# not provided by the available writers.
#
# Usage:
#   STDIN:  stream containing one or more BLAST or PSI-BLAST reports.
#   STDOUT: none, but generates an output file "custom_writer.out"
#           containing tab-delimited data on a per-hit basis.
#   STDERR: Progress info.
#
# Author: Steve Chervitz <sac@bioperl.org>

package MyBlastWriter;

use strict;
use Bio::Root::Root;
use Bio::SearchIO::SearchWriterI;

use base qw( Bio::Root::Root Bio::SearchIO::SearchWriterI );

sub to_string {
    my ($self, $result, @args) = @_;
    my $str = '';

    my $hits_reported = 0;

    foreach my $hit($result->hits) {

      # If this is a PSI-BLAST report, only report novel hits
      if( $result->psiblast ) {
	# Note that we could have supplied this has a -HIT_FILTER function
	# when we defined our input SearchIO object. Then we wouldn't need 
	# to define a custom writer.
	next unless $hit->iteration > 1 and not $hit->found_again;
      }

      $hits_reported++;
      printf STDERR "$hit\n";

      $str .= sprintf "%s\t%d\t%s\t%d\t%.2f\t%d\t%.1e\t%d\t%d\t%d\t%d\t%s\n", 
	               $result->query_name, $result->query_length, $hit->name,
	               $hit->length, $hit->frac_identical('query'), $hit->length_aln,
	               $hit->expect, $hit->score, $hit->bits,
	               $hit->gaps('total'), $hit->num_hsps, $hit->iteration || '-';
    }

    printf STDERR "\n%d hits written\n", $hits_reported;

    $str;

}

package main;

#===================================================
# Start of script 
#===================================================

use strict;

use Bio::SearchIO;

select STDOUT; $|=1; 

my $in     = Bio::SearchIO->new( -format => 'blast', 
				 -fh => \*ARGV,
                                 -signif => 0.1 );
my $writer = MyBlastWriter->new();
my $out    = Bio::SearchIO->new( -format => 'blast',
				 -writer => $writer,
				 -file   => ">custom_writer.out" );

while ( my $result = $in->next_result() ) {
    printf STDERR "Report %d: $result\n", $in->result_count;
    $out->write_result($result);
}

printf STDERR "\n%d Results processed.\n", $in->result_count;
printf STDERR "Output sent to file: %s\n",  $out->file if $out->file;