1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
package Bio::Graphics::Browser2::DataLoader::wiggle;
# $Id$
use strict;
use base 'Bio::Graphics::Browser2::DataLoader';
use File::Basename 'basename','dirname';
use IO::String;
my $HASBIGWIG;
sub start_load {
my $self = shift;
my $loader = Bio::Graphics::Wiggle::Loader::Nosample->new(
$self->data_path,
$self->track_name,
);
$loader->status_setter($self);
$self->wigloader($loader);
}
# we are doing nothing during load_line except letting the
# base class make a copy of the file in SOURCES. finish_load()
# will do all the work
sub load_line { }
sub finish_load {
my $self = shift;
my $wigloader = $self->wigloader;
my $sourcefh = IO::File->new($self->source_file,'<');
$self->set_status('indexing data');
$wigloader->load($sourcefh);
$self->set_status('creating config file');
$self->close_conf; # the featurefile loader will handle the conf for us
my $featurefiledata = $wigloader->featurefile('featurefile');
eval "require Bio::Graphics::Browser2::DataLoader::featurefile"
unless Bio::Graphics::Browser2::DataLoader::featurefile->can('new');
my $ff_loader = Bio::Graphics::Browser2::DataLoader::featurefile->new(
$self->track_name,
$self->data_path,
$self->conf_path,
$self->settings,
$self->loadid,
);
$ff_loader->open_conf;
$ff_loader->start_load;
my @ff_lines = split "\n",$featurefiledata;
$ff_loader->load_line($_) foreach @ff_lines;
$ff_loader->finish_load;
$ff_loader->close_conf;
$self->add_track($_) foreach $ff_loader->tracks;
$self->wigloader(undef); # to avoid memory leak
}
sub wigloader {
my $self = shift;
my $d = $self->{wigloader};
$self->{wigloader} = shift if @_;
return $d;
}
# overload wiggle loader so that we don't go to genome-wide statistics
# on very large files. Always explicitly calculate stats for file.
package Bio::Graphics::Wiggle::Loader::Nosample;
use base 'Bio::Graphics::Wiggle::Loader';
sub status_setter {
my $self = shift;
$self->{__status_setter} = shift;
}
sub set_status {
my $self = shift;
my $msg = shift;
my $ss = $self->{__status_setter} or return;
$ss->set_status($msg);
}
sub report_status {
my $self = shift;
return unless $self->{__linecount}++ % 10000 == 0;
$self->set_status("Indexed ",$self->{__linecount}-1," lines");
}
sub load {
my $self = shift;
$self->{__linecount} = 0;
$self->SUPER::load(@_);
}
sub minmax {
my $self = shift;
my ($infh,$bedline) = @_;
local $_;
my $transform = $self->get_transform;
my $seqids = ($self->current_track->{seqids} ||= {});
my $chrom = $self->{track_options}{chrom};
$self->set_status('calculating descriptive statistics');
my %stats;
if ($bedline) { # left-over BED line
my @tokens = split /\s+/,$bedline;
my $seqid = $tokens[0];
my $value = $tokens[-1];
$value = $transform->($self,$value) if $transform;
$stats{$seqid} ||= Statistics::Descriptive::Sparse->new();
$stats{$seqid}->add_data($value);
}
my $count;
while (<$infh>) {
last if /^track|fixedStep|variableStep/;
next if /^\#/;
my @tokens = split(/\s+/,$_) or next;
my $seqid = @tokens > 3 ? $tokens[0] : $chrom;
$self->set_status("chromosome $seqid: line $count") if $count++ % 1000 == 0;
my $value = $tokens[-1];
$value = $transform->($self,$value) if $transform;
$stats{$seqid} ||= Statistics::Descriptive::Sparse->new();
$stats{$seqid}->add_data($value);
}
for my $seqid (keys %stats) {
$seqids->{$seqid}{min} = $stats{$seqid}->min();
$seqids->{$seqid}{max} = $stats{$seqid}->max();
$seqids->{$seqid}{mean} = $stats{$seqid}->mean();
$seqids->{$seqid}{stdev} = $stats{$seqid}->standard_deviation();
}
}
sub process_bed {
my $self = shift;
$self->report_status;
$self->SUPER::process_bed(@_);
}
sub process_fixedline {
my $self = shift;
$self->report_status;
$self->SUPER::process_fixedline(@_);
}
sub process_variableline {
my $self = shift;
$self->report_status;
$self->SUPER::process_variableline(@_);
}
1;
|