File: histoplot.pl

package info (click to toggle)
murasaki 1.68.6-6
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,928 kB
  • ctags: 3,100
  • sloc: cpp: 16,010; perl: 8,365; makefile: 186
file content (156 lines) | stat: -rwxr-xr-x 4,341 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/perl

#Copyright (C) 2006-2008 Keio University
#(Kris Popendorf) <comp@bio.keio.ac.jp> (2006)
#
#This file is part of Murasaki.
#
#Murasaki is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#Murasaki is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with Murasaki.  If not, see <http://www.gnu.org/licenses/>.

use Getopt::Long;
use Pod::Usage;

use strict;

my ($help,$man,$title,%opts,$extra_cmd,$echo_on,$byrank,$nopercent,$clean,$scale,$notitle);

my ($xres,$yres)=(800,800);
my $font="/usr/share/fonts/truetype/ttf-bitstream-vera/Vera.ttf";
$font="/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf" unless -e $font;
$font=undef unless -e $font;
my $fontsize;

my $opt_res=GetOptions('help|?' => \$help, man => \$man, 'title=s' => \$title,
		       'notitle'=>\$notitle,
		       'opt=s%'=>\%opts,'cmd=s'=>\$extra_cmd,"echo"=>\$echo_on,
		       "rank"=>\$byrank,"nopercent"=>\$nopercent,"clean"=>\$clean,
		       'font=s'=>\$font,'fontsize=f'=>\$fontsize,
		       'scale=s'=>\$scale,'xres=i'=>\$xres,'yres=i'=>\$yres,
		       'size=s'=>sub {($xres,$yres)=$_[1]=~m/(\d+)\D+(\d+)/ or die "Bad size format ($_[1])"});
pod2usage(1) if $help or $#ARGV<0 or !$opt_res;
pod2usage(-exitstatus => 0, -verbose => 1) if $man;


my ($file)=@ARGV;
die "File not found: $file" unless -f $file;
$byrank=1 if $file=~m/histogram\.details$/;

my $xlabel="bucket size";
my $ylabel="keys";
my $xlabel="rank" if $byrank;
my $ylabel="frequency" if $byrank;

if($byrank){
  print "Histogram by rank selected...\n";
  my $srcfile=$file;
  my @src;
  $file="$file.rank";
  if($clean or !-e $file){ #gotta make it!
    print "Rebuilding histogram file by rank\n";
    my $total;
    open(SRC,"$srcfile");
    while(<SRC>){
      my @quick=split(/\D+/,$_);
      my $val=pop(@quick);
      next unless $val;
      $total+=$val;
      push(@src,$val);
    }
    close(SRC);
    print "Sorting $#src values...\n";
    @src=sort {$b <=> $a} @src;
    print "Writing $file...\n";
    open(RANK,">$file");
    for(0..($#src)){
      my $val=$nopercent ? $src[$_]:$src[$_]/$total;
      $_+=1;
      print RANK "$_\t$val\n";
    }
    close(RANK);
    print "Done building rank file $file\n";
  }
}

my $fontset=qq!font "$font" $fontsize! if $fontsize and $font;

$title=$file unless $title;
my $extra_opts=join("\n",map 
		 {"set $_ $opts{$_}"} keys(%opts));
my $gnuplot_cmds=<<ENDTEXT;
set logscale x
set logscale y
set format "10^{\%L}"
set xlabel "$xlabel"
set ylabel "$ylabel"
$extra_opts
$extra_cmd

#plot "$file" with linespoints
set terminal png transparent $scale size $xres,$yres $fontset enhanced
set output "$file.png"
plot "$file" with linespoints notitle
ENDTEXT

$gnuplot_cmds=qq!set title "$title"\n!.$gnuplot_cmds unless $notitle;
open(GNUPLOT,"|gnuplot");
print $gnuplot_cmds if $echo_on;
print GNUPLOT $gnuplot_cmds;
close(GNUPLOT);

__END__

=head1 NAME

histoplot.pl -- plots histograms

=head1 SYNOPSIS

histoplot.pl [--title=<title>] [--opt=<key=value,key=value...>] [--echo] <historam file>

=head1 OPTIONS

=over 8

=item B<--title>
Sets an optional graph title. Default is histogram file name.

=item B<--opt=<key=val>[,<key=val>...]>
Sends additional "set" options to gnuplot. For example, say you want
to force the x axis to [1:1000], you can use --opt=xrange=[1:1000]

=item B<--cmd=<cmd1>[;<cmd2>...]> Sends extra arbitrary commands to
gnuplot.

=item B<--echo> Echos a copy of the commands sent to gnuplot.

=item B<--rank> Sort words into rank

=item B<--nopercent> Preserve raw frequency, not percent.

=item B<--clean> Forces a remake of the gnuplot data file (good if
you've switched to --nopercent)

=item B<--font> Specify a font (and optional pointsize) to use in PNG

=item B<--font> Just specify font size (and use bitstream vera as font)

=item B<--scale> scale for default font family: tiny, small, mediu, large, or giant

=back

=head1 DESCRIPTION

Draws histogram plots.

=cut