File: decode.pl

package info (click to toggle)
libsereal-decoder-perl 4.005%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,952 kB
  • sloc: ansic: 8,105; perl: 5,782; sh: 25; makefile: 5
file content (361 lines) | stat: -rw-r--r-- 10,845 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
#!/usr/bin/perl -w

# This script is for testing Sereal decode speeds, with various
# generated test inputs (which are first encoded).  Sample usages:
#
# decode.pl --build --output=data.srl
#
# will (1) build a "graph" (a hash of small strings, really,
# which can be seen as an adjacency list representation of
# a graph, the vertex and its neighbors) of 1e5 vertices
# (2) decode the encoded blob 5 times (the 'graph', 1e5, and 5
# being the defaults).
#
# Other inputs types (--type=T) are
# aoi (array of int) (value == key)
# aoir (array of int) (value == randomly shuffled key)
# aof (array of float) (rand())
# aos (array of string) (value eq key)
# hoi (hash of int)
# hof (hash of float)
# hos (hash of string)
#
# The 'base' number of elements in each case is controlled by --elem=N.
# For the array and hash the number of elements is trivial, for the graph
# the total number of elements (in its hash-of-hashes) is O(N log N).
#
# The number decode repeats is controlled by --repeat_decode=N and --repeat_decode=N.
#
# The encode input needs to be built only once, the --output tells
# where to save the encoded blob.  The encode blob can be read back
# from the save file with --input, much faster, especially in the case
# of the graph input.

use strict;

use Time::HiRes;
use Sereal::Encoder;
use Sereal::Decoder;
use Getopt::Long;
use Fcntl qw[O_RDONLY O_WRONLY O_CREAT O_TRUNC];
use List::Util qw[shuffle];

sub MB () { 2 ** 20 }

my %Opt;
my @Opt = ('input=s', 'output=s', 'type=s', 'elem=f', 'build',
           'repeat_encode=i', 'repeat_decode=i',

           # If non-zero, will drop the minimum and maximum
           # values before computing statistics IF the number
           # of measurements is at least this limit.  So with
           # a value of 5 will leave 3 measurements.  Lowers
           # the stddev, should not affect avg/median (much).
           # Helpful in reducing cache effects.
           'min_max_drop_limit=i',

           'size');
my %OptO = map { my ($n) = /^(\w+)/; $_ => \$Opt{$n} } @Opt;
my @OptU = map { "--$_" } @Opt;

GetOptions(%OptO) or die "GetOptions: @OptU\n";

my $data;
my $blob;
my $size;
my $data_size;
my $blob_size;
my $dt;

if (defined $Opt{size}) {
    eval 'use Devel::Size qw[total_size]';
    if ($@) {
        die "$0: --size but Devel::Size=total_size not found\n";
    }
}

if (defined $Opt{build}) {
    die "$0: --input with --build makes no sense\n" if defined $Opt{input};
    $Opt{elem} //= 1e5;
} else {
    die "$0: --output without --build makes no sense\n" if defined $Opt{output};
    die "$0: --elem without --build makes no sense\n" if defined $Opt{elem};
    die "$0: Must specify either --build or --input\n" unless defined $Opt{input};
}
if (defined ($Opt{output})) {
    die "$0: --input with --output makes no sense\n" if defined $Opt{input};
}

$Opt{type} //= 'graph';
$Opt{repeat_encode} //= 1;
$Opt{repeat_decode} //= 5;
$Opt{min_max_drop_limit} //= 0;

my %TYPE = map { $_ => 1 } qw[aoi aoir aof aos hoi hof hos graph];

die "$0: Unexpected --type=$Opt{type}\n$0: Expected --type=@{[join('|', sort keys %TYPE)]}\n"
    unless exists $TYPE{$Opt{type}};

sub Times::new {
    my $t = Time::HiRes::time();
    my ($u, $s, $cu, $cs) = times();
    bless {
        wall => $t,
        usr  => $u,
        sys  => $s,
        cpu  => $u + $s,
        cusr => $cu,
        csys => $cs,
    }, $_[0];
}
sub Times::diff {
    die "Unexpected diff(@_)\n" unless ref $_[0] eq ref $_[1];
    bless { map { $_ => ($_[0]->{$_} - $_[1]->{$_}) } keys %{$_[0]} }, ref $_[0];
}
sub Times::wall { $_[0]->{wall} }
sub Times::usr  { $_[0]->{usr}  }
sub Times::sys  { $_[0]->{sys}  }
sub Times::cpu  { $_[0]->{cpu}  }
# times() can often sum just a tad higher than wallclock.
sub Times::pct { 100 * ($_[0]->cpu > $_[0]->wall ? 1 : $_[0]->cpu / $_[0]->wall) }

sub timeit {
    my $code = shift;
    my $t0 = Times->new();
    my @res = $code->(@_);
    my $t1 = Times->new();
    my $dt = $t1->diff($t0);
    return $dt;
}

sub __stats {
    # The caller is supposed to have done this sorting
    # already, but let's be wasteful and paranoid.
    my @v = sort { $a <=> $b } @_;
    my $min = $v[0];
    my $max = $v[-1];
    my $med = @v % 2 ? $v[@v/2] : ($v[@v/2-1] + $v[@v/2]) / 2;
    my $sum = 0;
    for my $t (@_) {
        $sum += $t;
    }
    my $avg = $sum / @_;
    my $sqsum = 0;
    for my $t (@_) {
        $sqsum += ($avg - $t) ** 2;
    }
    my $stddev = sqrt($sqsum / @_);
    return ( avg => $avg,
             stddev => $stddev,
             rstddev => $avg ? $stddev / $avg : undef,
             min => $min, med => $med, max => $max );
}

sub stats {
    my %stats;
    for my $k (qw(wall cpu)) {
        my @v = sort { $a <=> $b } map { $_->{$k} } @_;
        if ($Opt{min_max_drop_limit} > 0 &&
            @v >= $Opt{min_max_drop_limit}) {
            print "$k: dropping min and max ($v[0] and $v[-1])\n";
            shift @v;
            pop @v;
        }
        $stats{$k} = { __stats(@v) };
    }
    return %stats;
}

if (defined $Opt{build}) {
    print "building data\n";
    my $E;
    if ($Opt{type} eq 'graph') {
	print "building graph\n";
	my $V = $Opt{elem};
	$E = int($V * log($V)/log(2));
	printf("data of %d (%.1fM) vertices %d (%.1fM) edges\n",
	       $V, $V / MB, $E, $E / MB);
	$dt = timeit(
	    sub {
		for my $i (1..$E) {
		    my $a = int(rand($V));
		    my $b = int(rand($V));
		    $data->{$a}{$b}++;
		}
	    });
    } elsif ($Opt{type} eq 'aoi') {
	print "building aoi\n";
	$E = $Opt{elem};
	$dt = timeit(
	    sub {
		for my $i (1..$E) {
		    push @$data, $i;
		}
	    });
    } elsif ($Opt{type} eq 'aoir') {
	print "building aoir\n";
	$E = $Opt{elem};
	$dt = timeit(
	    sub {
		for my $i (shuffle 1..$E) {
		    push @$data, $i;
		}
	    });
    } elsif ($Opt{type} eq 'aof') {
	print "building aof\n";
	$E = $Opt{elem};
	$dt = timeit(
	    sub {
		for my $i (1..$E) {
		    push @$data, rand();
		}
	    });
    } elsif ($Opt{type} eq 'aos') {
	print "building aos\n";
	$E = $Opt{elem};
	$dt = timeit(
	    sub {
		for my $i (1..$E) {
		    push @$data, rand() . $$;
		}
	    });
    } elsif ($Opt{type} eq 'hoi') {
	print "building hoi\n";
	$E = $Opt{elem};
	$dt = timeit(
	    sub {
		for my $i (1..$E) {
		    $data->{$i} = $i;
		}
	    });
    } elsif ($Opt{type} eq 'hof') {
	print "building hof\n";
	$E = $Opt{elem};
	$dt = timeit(
	    sub {
		for my $i (1..$E) {
		    $data->{$i} = rand();
		}
	    });
    } elsif ($Opt{type} eq 'hos') {
	print "building hos\n";
	$E = $Opt{elem};
	$dt = timeit(
	    sub {
		for my $i (1..$E) {
		    $data->{$i} = "$i";
		}
	    });
    } else {
	die "$0: Unexpected type '$Opt{type}'\n";
    }
    printf("build %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f elements/sec)\n",
           $dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct, $E / $dt->wall);
    if ($Opt{size}) {
	$dt = timeit(sub { $data_size = total_size($data);});
	printf("data size %d bytes (%.1fMB) %.1f sec\n",
	       $data_size, $data_size / MB, $dt->wall);
    }

    my $encoder = Sereal::Encoder->new;

    {
	print "encoding data\n";
        my @dt;
        for my $i (1..$Opt{repeat_encode}) {
            $dt = timeit(sub { $blob = $encoder->encode($data); });
            $blob_size = length($blob);
            printf("%d/%d: encode to %d bytes (%.1fMB) %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
                   $i, $Opt{repeat_encode}, $blob_size, $blob_size / MB, $dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct,
                   $blob_size / (MB * $dt->wall));
            push @dt, $dt;
        }
        if (@dt) {
            my %stats = stats(@dt);
            for my $k (qw(wall cpu)) {
                my $avg = $stats{$k}{avg};
                printf("encode %-4s avg %.2f sec (%.1f MB/sec) stddev %.2f sec (%.2f) min %.2f med %.2f max %.2f\n",
                       $k,
                       $avg, $avg ? $blob_size / (MB * $avg) : 0, $stats{$k}{stddev}, $avg ? $stats{$k}{rstddev} : 0,
                       $stats{$k}{min}, $stats{$k}{med}, $stats{$k}{max});
            }
        }
    }

    if (defined $Opt{output}) {
	print "opening output\n";
	my $fh;
	sysopen($fh, $Opt{output}, O_WRONLY|O_CREAT|O_TRUNC)
	    or die qq[sysopen "$Opt{output}": $!\n];
	print "writing blob\n";
	$dt = timeit(
	    sub {
		syswrite($fh, $blob)
		    or die qq[syswrite "$Opt{otput}": $!\n] });
	$blob_size = length($blob);
	printf("wrote %d bytes (%.1f MB) %.2f sec  %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
	       $blob_size, $blob_size / MB, $dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct,
               $blob_size / (MB * $dt->wall));
    }
} elsif (defined $Opt{input}) {
    print "opening input\n";
    my $fh;
    sysopen($fh, $Opt{input}, O_RDONLY) or die qq[sysopen "$Opt{input}": $!\n];
    print "reading blob\n";
    $dt = timeit(
	sub {
	    sysread($fh, $blob, -s $fh)
		or die qq[sysread "$Opt{input}": $!\n];
	});
    $blob_size = length($blob);
    printf("read %d bytes (%.1f MB) %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
	   $blob_size, $blob_size / MB, $dt->wall,  $dt->usr, $dt->sys, $dt->cpu, $dt->pct,
           $blob_size / (MB * $dt->wall));
}

my $decoder = Sereal::Decoder->new;

{
    print "decoding blob\n";
    $blob_size = length($blob);
    my @dt;
    for my $i (1..$Opt{repeat_decode}) {
	$dt = timeit(sub { $data = $decoder->decode($blob); });
	printf("%d/%d: decode from %d bytes (%.1fM) %.2f sec %.2f usr %.2f sys %.2f cpu %3d%% (%.1f MB/sec)\n",
	       $i, $Opt{repeat_decode}, $blob_size, $blob_size / MB,
	       $dt->wall, $dt->usr, $dt->sys, $dt->cpu, $dt->pct, $blob_size / (MB * $dt->wall));
	push @dt, $dt;
    }
    if (ref $data eq 'HASH') {
        printf("data is hashref of %d elements\n", scalar keys %{$data});
    } elsif (ref $data eq 'ARRAY') {
        printf("data is hashref of %d elements\n", scalar @{$data});
    } elsif (ref $data) {
        printf("data is ref of %s\n", ref $data);
    } else {
        printf("data is of unexpected type\n");
    }
    if (@dt) {
        my %stats = stats(@dt);
        for my $k (qw(wall cpu)) {
            my $avg = $stats{$k}{avg};
            printf("decode %-4s avg %.2f sec (%.1f MB/sec) stddev %.2f sec (%.2f) min %.2f med %.2f max %.2f\n",
                   $k,
                   $avg, $avg ? $blob_size / (MB * $stats{$k}{avg}) : 0, $stats{$k}{stddev}, $avg ? $stats{$k}{rstddev} : 0,
                   $stats{$k}{min}, $stats{$k}{med}, $stats{$k}{max});
        }
    }
    if ($Opt{size}) {
	$dt = timeit(sub { $data_size = total_size($data); });
	printf("data size %d bytes (%.1fMB) %.1f sec\n",
	       $data_size, $data_size / MB, $dt->wall);
    }
}

if ($Opt{size}) {
    if ($blob_size && $data_size) {
        printf("data size / blob size %.2f\n", $data_size / $blob_size);
    }
}

exit(0);