1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
#!/usr/bin/env perl -w
# list_matrices.pl
# by Boris Lenhard
#
# See POD documentation for this script at the end of the file
#
use strict;
use Getopt::Long; # for parsing command line arguments
use Pod::Usage;
use TFBS::DB::FlatFileDir;
# Get command line options - if you are curious how this
# works, check the Getopt::Long module documentation.
my ($database_dir, $id_only, $verbose, $help);
GetOptions('help' => \$help,
'database=s' => \$database_dir,
'id-only' => \$id_only,
'verbose' => \$verbose
);
if($help) {
pod2usage(-exitstatus=>0, -verbose=>2);
}
elsif (!$database_dir) {
pod2usage(1);
}
# connect to FlatFileDir matrix database
# (there is a sample FlatFileDir matrix database directory
# examples/SAMPLE_FlatFileDir in the TFBS distribution package)
# Change this line if you want to use a different type of database
# (e.g. TFBS::DB::JASPAR2)
my $db = TFBS::DB::FlatFileDir->connect($database_dir);
# get all matrices (TFBS::Matrix::PWM objects) into a TFBS::MatrixSet object
my $matrixset = $db->get_MatrixSet(-matrixtype=>"PFM");
# print heading if normal output
unless ($id_only or $verbose) {
printf("\n %-10s%-15s%-20s%10s%10s\n",
'MatrixID', 'Name', 'Class','Length', 'Total IC');
}
# print line if normal or verbose output
unless($id_only) { print ("-"x70,"\n"); }
# Iterate through the set and display ID and name
# (aggregate classes in TFBS - TFBS::MatrixSet, TFBS::SiteSet,
# TFBS::SitePairSet) are equipped with iterators that all follow
# the same syntax:)
my $mx_iterator = $matrixset->Iterator(-sort_by=>'ID');
while (my $pfm = $mx_iterator->next()) { #for each matrix in the set
if ($verbose) {
print ("\n","-"x65);
print ("\nMatrix ID : ", $pfm->ID);
print ("\nTransctiption factor name : ", $pfm->name);
print ("\nStructural class : ", $pfm->class);
print ("\nTotal information content : ",
sprintf("%2.2f",$pfm->to_ICM->total_ic));
print ("\nMatrix:\n", $pfm->prettyprint);
print ("","-"x65,"\n\n");
}
elsif ($id_only) {
print ($pfm->ID, "\t", $pfm->name, "\n");
}
else {
printf(" %-10s%-15s%-20s%10s%10.2f\n",
$pfm->ID, $pfm->name, $pfm->class,
$pfm->length, $pfm->to_ICM->total_ic);
}
}
# print the line for normal and verbouse output
unless($id_only) {
print ("-"x70, "\nTotal ", $matrixset->size, " matrices.\n\n");
}
# The rest is usage message if the user requests help
# or fails to provide required parameters
__END__
=head1 NAME
list_matrices.pl - List info on matrix patterns stored in a flat file directory
=head1 SYNOPSIS
./list_matrices.pl -d <TFBS_matrix_dbase_dir> [other_options]
=head1 OPTIONS
=over 8
=item B<-d or --database> <directory name>
REQUIRED: Name of the FlatFileDir database directory to
use for retrieving matrices.
A sample database directory examples/SAMPLE_FlatFileDir
is available in TFBS distribution.
=item B<-i or --id-only>
OPTIONAL: Prints only a list of matrix IDs
=item B<-v or --verbose>
OPTIONAL: Prints full record (matrix and info). Overrides -i if set simultaneously.
=back
=head1 DESCRIPTION
This is an example script that displays information about matrix patterns stored
in a flat file directory-type database. Its source code is
meant to be studied by bioinformaticians who wish to learn how to
use TFBS modules.
=cut
|