File: removespikes.pl

package info (click to toggle)
ganglia 3.6.0-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid, stretch
  • size: 6,484 kB
  • ctags: 3,880
  • sloc: ansic: 27,874; sh: 11,052; python: 6,695; makefile: 565; perl: 366; php: 126; xml: 28
file content (193 lines) | stat: -rwxr-xr-x 6,193 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/perl -w
#
# matapicos v2.2 - Vins Vilaplana <vins at terra dot es)
#
# Translated by Humberto Rossetti Baptista <humberto at baptista dot name)
# slight adjustments and code cleanup too :-)
#
# Changes:
#  - 2007/02/27 - knobi@knobisoft.de - Various changes:
#                    Add value-based chopping (-t value)
#                    Add analysis only mode (-a)
#                    Controll verbose/debug output using -d and -v
#                    Add -h help option
#                    Move to using the Getopt::Std package
#                    Use "strict" mode
#  - 2006/01/12 - vins@terra.es - "$!" takes other values in some perl interpreters (e.g. FreeBSD 4.11-R). Thanks to Atle Veka!
#

use strict;
use Getopt::Std;
my %opt=();
getopts("adhl:t:v",\%opt);

my (@dump,%exp,@cols,@dbak,%tot,%por);
my ($linea,$linbak,$lino,$cdo,$tresto,$tstamp,$a,$b,$c,$cont);
my $DEBUG = 0;
my $ANALYZE = 0;
my $VERBOSE = 0;

# Limit % for cutting. Any peak representing less than this % will be cut
my $LIMIT=0.6; # obs this is really %, so 0.6 means 0.6% (and not 0.006%!)

# Threshhold for cutting. Values above it will be chopped if "-t" is used
my $THRESH=1.01e300; # Just set it to a very high default

# Flag to indicate whether we are doing "binning" or threshold based chopping
my $BINNING=1;

if ($opt{h} || ($#ARGV < 0)) {
   print "REMOVESPIKES: Remove spikes from RRDtool databases.\n\n";
   print "Usage:\n";
   print "$0 -d -a [-l number] [-t maxval] name_of_database\n\n";
   print "Where:\n";
   print "  -d enables debug messages\n";
   print "  -a runs only the analysis phase of the script\n";
   print "  -h prints this message\n";
   print "  -l sets the % limit of spikes bin-based chopping (default: $LIMIT)\n";
   print "  -t sets the value above which records are chopped. Disabled by default.\n";
   print "     Enabling value-based chopping will disable bin-based chopping\n\n";
   print "  -v Verbose mode. Shows some information\n";
   print "  name_of_database is the rrd file to be treated.\n";
   exit;
}

if ($opt{d}) { 
   $DEBUG = 1;
   $VERBOSE = 1; 
   print "Enabling DEBUG mode\n";
}

if ($opt{a}) { 
   $ANALYZE = 1; 
   print "Running in ANALYZE mode\n";
}

if ($opt{v}) { 
   $VERBOSE = 1; 
   print "Running in VERBOSE mode\n";
}

if ($opt{l}) { 
   $LIMIT=$opt{l}; 
   print "Limit for bin-based chopping set to $LIMIT\n" if $VERBOSE;
}

if ($opt{t}) { 
   $THRESH=$opt{t}; 
   $BINNING=0;
   printf("Max Value set to %g, disabling bin-based chopping\n",$THRESH) if $VERBOSE;
}

# temporary filename:
# safer this way, so many users can run this script simultaneusly
my $tempfile="/tmp/matapicos.dump.$$"; 

###########################################################################
# Dump the rrd database to the temporary file (as XML)
system("rrdtool dump $ARGV[0] > $tempfile") == 0 or die "\n";

# Scan the XML dump checking the variations and exponent deviations
open(FICH,"<$tempfile") 
   || die "$0: Cannot open file $tempfile:\n $! - $@";

while (<FICH>) {
  chomp;
  $linea=$_;
  $cdo=0;
  if ($linea=~/^(.*)<row>/) { $tstamp=$1; }
  if ($linea=~/(<row>.*)$/) { $tresto=$1; }
  if (/<v>\s\d\.\d+e.(\d+)\s<\/v>/) {
    @dump = split(/<\/v>/, $tresto);
    for ($lino=0; $lino<=$#dump-1; $lino++) {   # scans DS's within each row 
      if ( $dump[$lino]=~/\d\.\d+e.(\d+)\s/ ) { # make sure it is a number (and not NaN)
        $a=substr("0$lino",-2).":".$1;
        $exp{$a}++;                             # store exponents
        $tot{substr("0$lino",-2)}++;            # and keep a per DS total
      }
    }
  }
}

close FICH;

###########################################################################
# Scan the hash to get the percentage variation of each value
foreach $lino (sort keys %exp) {
  ($a)=$lino=~/^(\d+)\:/;                      
  $por{$lino}=(100*$exp{$lino})/$tot{$a};
}

if ($DEBUG || $ANALYZE) { 
   # Dumps percentages for debugging purposes
   print "--percentages--\n";
   foreach $lino (sort keys %exp) {
     print $lino."--".$exp{$lino}."/";
     ($a)=$lino=~/^(\d+)\:/;
     print $tot{$a}." = ".$por{$lino}."%\n";
   }
   print "---------------\n\n";
   exit if $ANALYZE;
}


###########################################################################
# Open the XML dump, and create a new one removing the spikes:
open(FICH,"<$tempfile") || 
   die "$0: Cannot open $tempfile for reading: $!-$@";
open(FSAL,">$tempfile.xml")  || 
   die "$0: Cannot open $tempfile.xml for writing: $!-$@";

$linbak='';
$cont=0;
while (<FICH>) {
  chomp;
  $linea=$_;
  $cdo=0;
  if ($linea=~/^(.*)<row>/) { $tstamp=$1; }     # Grab timestamp
  if ($linea=~/(<row>.*)$/) { $tresto=$1; }     # grab rest-of-line :-)
  if (/<v>\s\d\.\d+e.(\d+)\s<\/v>/) {           # are there DS's?
    @dump=split(/<\/v>/, $tresto);              # split them
    if ($linbak ne '') {
      for ($lino=0;$lino<=$#dump-1;$lino++) {   # for each DS:
        if ($dump[$lino]=~/\d\.\d+e.(\d+)\s/) { # grab number (and not a NaN)
	  $c=$&;
          $a=$1*1;                              # and exponent
          $b=substr("0$lino",-2).":$1";         # calculate the max percentage of this DS
          if (($BINNING &&                      #
		($por{$b}< $LIMIT)) ||          # if this line represents less than $LIMIT
	      (!$BINNING &&			#
		($c > $THRESH))) {              # or the value is larger then $THRESH
            $linea=$tstamp.$linbak;             # we dump it
            $cdo=1;
            $tresto=$linbak;
          }
        }
      }
    }
    $linbak=$tresto;
    if ($cdo==1) { 
      print "Chopping peak at $tstamp\n" if $DEBUG;
      $cont++; }
  }
  
  print FSAL "$linea\n";
}
close FICH;
close FSAL;

###########################################################################
# Cleanup and move new file to the place of original one
# and original one gets backed up.
if ($cont == 0 && $VERBOSE) { print "No peaks found.!\n"; }
else {
  rename($ARGV[0],"$ARGV[0].old");
  $lino="rrdtool restore $tempfile.xml $ARGV[0]";
  system($lino);
  die "$0: Unable to execute the rrdtool restore on $ARGV[0] - $! - $@\n" if $? != 0;
}

# cleans up the files created
unlink("$tempfile");
unlink("$tempfile.xml");