File: histocomp.pl

package info (click to toggle)
murasaki 1.68.6-6
links: PTS, VCS
area: main
in suites: stretch
size: 1,928 kB
ctags: 3,100
sloc: cpp: 16,010; perl: 8,365; makefile: 186
file content (226 lines) | stat: -rwxr-xr-x 5,458 bytes
parent folder | download | duplicates (5)
#!/usr/bin/perl

#Copyright (C) 2006-2008 Keio University
#(Kris Popendorf) <comp@bio.keio.ac.jp> (2006)
#
#This file is part of Murasaki.
#
#Murasaki is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#Murasaki is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with Murasaki.  If not, see <http://www.gnu.org/licenses/>.

use File::Basename;
use Getopt::Long;
use Pod::Usage;
#use Data::Dump qw{dump};

use strict;
my ($help,$man,$opt_prefix);

BEGIN {
  unshift(@INC,(fileparse($0))[1].'perlmodules');
}
use Murasaki;

my $samples;
my $fn;
my $format="png";
my $lwd=3;
my ($opt_log,$opt_clean,$opt_nofstats,$fstats,%avg,$drawAvg,$maxsamples,$nofn,$outfile,$opt_names,$opt_xlab,$opt_ylab,$opt_title);
$opt_log='xy';
my ($width,$height,$res)=(10,7,96);

GetOptions('help|?' => \$help, man => \$man,
	   'log=s'=>\$opt_log, clean=>\$opt_clean, 'format=s'=>\$format,
	   pdf=>sub{$format='pdf'},'lwd=f'=>\$lwd,
	   'output=s'=>\$outfile, 'names=s'=>\$opt_names,
	   'xlab'=>\$opt_xlab,'ylab=s'=>\$opt_ylab,
	   'res=f'=>\$res,'width=f'=>\$width,'height=f'=>$height,
	   'title=s'=>\$opt_title
	  ) or pod2usage(1);
pod2usage(1) if $help or $#ARGV<0;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;

my @names=getName(@ARGV);
my $type=(scalar(grep(/rank/i,@ARGV))==scalar(@ARGV)) ? "rank":"buckets";
$outfile=((fileparse($ARGV[0]))[1]).join("-",@names).".histogram.$type.$format" unless $outfile;
my ($basename,$path,$suffix)=fileparse($outfile,qr/\.[^.]/);
$path=~s!/$!!; #kill trailing / if any
my $basefile="$path/$basename";

print "Writing output to $basefile\n";

my %infh;

foreach my $file (@ARGV){
  open($infh{$file},$file) or die "Couldn't open $file";
}

my $datafile="$basefile.combined";

if($opt_clean or !-f $datafile){
  print "Merging input files...\n";
  open(my $datafh,">$datafile");
  print $datafh join("\t",@names)."\n";
  my $rank=1;
  while(scalar(keys(%infh))){
    my $first=1;
    foreach my $file (@ARGV){
      my ($junk,$good);
      if(defined($infh{$file})){
	my $line=readline($infh{$file});
	do{print "$file done\n";delete $infh{$file}} unless $line;
	chomp $line;
#	print "Got $line from $file\n" if $line;
	($junk,$good)=split(/\t/,$line);
      }
      $good="0" unless $good;
      if($first){
	print $datafh $good;
      }else{
	print $datafh "\t".$good;
      }
      $first=undef;
    }
  }continue{$rank++;print $datafh "\n";}
  close($datafile);
}else{
  print "Reusing existing data file\n";
}

my @legendTerms=($opt_names ? split(/,/,$opt_names):@names);
my $legendpos="1,max(yl)/22+min(yl)";
my $outputter=$format ne 'pdf' ?
  qq!bitmap(file="$outfile",type="png16m",width=$width,height=$height,res=$res)!:
  qq!pdf(file="$outfile",width=$width,height=$height)!;
my $rsrc="$outfile.R";
my $title=$opt_title ? $opt_title:(join(" ",@names)." $type histogram");
my $xlab=$opt_xlab ? $opt_xlab:($type eq "rank" ? "Rank":"Bucket size");
my $ylab=$opt_ylab ? $opt_ylab:"Frequency";
my $type='l';

  #do the R output
  my $pch="'o'";
  open(my $R,">$rsrc");
  print $R <<ENDTEXT;
$outputter
dat<-na.omit(read.delim('$datafile'));
xl<-c(1,dim(dat)[1]);
yl<-c(min(dat[dat!=0]),max(dat));
xl
yl
cat("$legendpos");
ENDTEXT

my @colors;
foreach my $i (1..(scalar(@ARGV))){
  my $color=($i+1);
  unless($#ARGV>0) { #if only 1 x...
    $color=1;
  }

  push(@colors,$color);

  if($i==1){
print $R <<ENDTEXT;
plot(dat[,$i],type='$type',xlim=xl,ylim=yl,col=$color,xlab='$xlab',ylab='$ylab',main='$title',log='$opt_log',lwd=$lwd,pch=$pch)
ENDTEXT
  }else{
    print $R <<ENDTEXT;
points(dat[,$i],col=$color,lwd=$lwd,pch=$pch,type='$type')
ENDTEXT
  }
}

my $names=join(",",map {qq!"$_"!} @legendTerms);
my $cols=join(",",@colors);
my $pchs=join(",",map {$pch} @colors);
print $R <<ENDTEXT; #pch=c($pchs)
legend($legendpos,c($names),col=c($cols),lwd=$lwd)
ENDTEXT
close($R);
system("R --vanilla < $rsrc");

exit(0);

sub sum {
  my $sum=0;
  grep {$sum+=$_} @_;
  return $sum;
}

sub mean {
  my $total;
  foreach(@_){
    $total+=$_;
  }
  return $total/($#_+1);
}

sub min {
  my $best=$_[0];
  foreach(@_){
    $best=$_ if $_<$best;
  }
  return $best;
}

sub max {
  my $best=$_[0];
  foreach(@_){
    $best=$_ if $_>$best;
  }
  return $best;
}

sub pickOne {
  my ($ps1,$ps2,@opts)=@_;
  print $ps1."\n";
  print map {($_==0 ? "[$_]":" $_ ").": $opts[$_]\n"} 0..$#opts;
  my $res;
  do{
    print $ps2;
    $res=<STDIN>;
    chomp $res;
  }while($res && ($res<0 or $res>$#opts));
  return $opts[$res];
}

sub getName {
  my @ret=map {
    my ($name,$path,$suffix) = fileparse($_, qr{\..*});
    $name
    } @_;
  return @ret if $#_;
  return $ret[0];
}

__END__

=head1 NAME

histocomp.pl -- plot multiple histograms on one graph

=head1 SYNOPSIS

histocomp.pl <input1> [input2 ...]

=head1 OPTIONS

Plot a couple histograms on one graph

 Other options:
--log can apply log scale to x or y or xy axes
--lwd can specify line weight
--format can specify output file format (default png)
--pdf set format to pdf
--output specify a different output (otherwise it's autonamed from the inptus)