File: compare.shingle.benchmark.tables.pl

package info (click to toggle)
lucene-solr 3.6.2%2Bdfsg-24
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 91,200 kB
  • sloc: java: 465,555; xml: 24,939; javascript: 5,291; ruby: 3,453; jsp: 2,637; python: 1,619; sh: 1,556; perl: 1,407; cpp: 305; makefile: 39
file content (116 lines) | stat: -rw-r--r-- 3,948 bytes parent folder | download | duplicates (19)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/perl
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ------------------------------------------
# compare.shingle.benchmark.jira.tables.pl
#
# Takes as cmdline parameters two JIRA-formatted benchmark results, as produced
# by shingle.bm2jira.pl (located in the same directory as this script), and
# outputs a third JIRA-formatted comparison table.
#
# The difference is calculated as a percentage:
#
#   100 * (unpatched-elapsed - patched-elapsed / patched-elapsed)
#
# where (un)patched-elapsed values have had the no-shingle-filter 
# (StandardAnalyzer) elapsed time subtracted from them.
#
#
# Example shingle.bm2jira.pl output:
# ----------------------------------
# JAVA:
# java version "1.5.0_15"
# Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_15-b04)
# Java HotSpot(TM) 64-Bit Server VM (build 1.5.0_15-b04, mixed mode)
#
# OS:
# cygwin
# WinVistaService Pack 2
# Service Pack 26060022202561
#
# ||Max Shingle Size||Unigrams?||Elapsed||
# |1 (Unigrams)|yes|2.19s|
# |2|no|4.74s|
# |2|yes|4.90s|
# |4|no|5.82s|
# |4|yes|5.97s|

use strict;
use warnings;

my $usage = "Usage: $0 <unpatched-file> <patched-file>\n";

die $usage unless ($#ARGV == 1 && -f $ARGV[0] && -f $ARGV[1]);

my %stats = ();

open UNPATCHED, "<$ARGV[0]" || die "ERROR opening '$ARGV[0]': $!";
my $table_encountered = 0;
my $standard_analyzer_elapsed = 0;
my %unpatched_stats = ();
my %patched_stats = ();
while (<UNPATCHED>) {
  unless ($table_encountered) {
    if (/\Q||Max Shingle Size||Unigrams?||Elapsed||\E/) {
      $table_encountered = 1;
    } else {
      print;
    }
  } elsif (/\|([^|]+)\|([^|]+)\|([\d.]+)s\|/) {
    my $max_shingle_size = $1;
    my $output_unigrams = $2;
    my $elapsed = $3;
    if ($max_shingle_size =~ /Unigrams/) {
      $standard_analyzer_elapsed = $elapsed;
    } else {
      $unpatched_stats{$max_shingle_size}{$output_unigrams} = $elapsed;
    }
  }
}
close UNPATCHED;

open PATCHED, "<$ARGV[1]" || die "ERROR opening '$ARGV[1]': $!";
while (<PATCHED>) {
  if (/\|([^|]+)\|([^|]+)\|([\d.]+)s\|/) {
    my $max_shingle_size = $1;
    my $output_unigrams = $2;
    my $elapsed = $3;
    if ($max_shingle_size =~ /Unigrams/) {
      $standard_analyzer_elapsed = $elapsed
         if ($elapsed < $standard_analyzer_elapsed);
    } else {
      $patched_stats{$max_shingle_size}{$output_unigrams} = $elapsed;
    }
  }
}
close PATCHED;

print "||Max Shingle Size||Unigrams?||Unpatched||Patched||StandardAnalyzer||Improvement||\n";
for my $max_shingle_size (sort { $a <=> $b } keys %unpatched_stats) {
  for my $output_unigrams (sort keys %{$unpatched_stats{$max_shingle_size}}) {
    my $improvement 
      = ( $unpatched_stats{$max_shingle_size}{$output_unigrams}
        - $patched_stats{$max_shingle_size}{$output_unigrams})
      / ( $patched_stats{$max_shingle_size}{$output_unigrams}
        - $standard_analyzer_elapsed);
    $improvement = int($improvement * 1000 + .5) / 10; # Round and truncate
    printf "|$max_shingle_size|$output_unigrams"
          ."|$unpatched_stats{$max_shingle_size}{$output_unigrams}s"
          ."|$patched_stats{$max_shingle_size}{$output_unigrams}s"
          ."|${standard_analyzer_elapsed}s|%2.1f%%|\n", $improvement;
  }
}