File: factor-gnufactor.pl

package info (click to toggle)
libmath-prime-util-perl 0.73-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,796 kB
  • sloc: perl: 24,676; ansic: 11,471; makefile: 26; python: 24
file content (198 lines) | stat: -rwxr-xr-x 6,363 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env perl
use strict;
use warnings;
use Math::Prime::Util qw/factor srand urandomm/;
use File::Temp qw/tempfile/;
use Math::BigInt try => 'GMP,Pari';
use Config;
use autodie;
use Text::Diff;
use Time::HiRes qw(gettimeofday tv_interval);
my $maxdigits = 100;
$| = 1;  # fast pipes
srand(87431);
my $num = 1000;
my $semiprimes = 0;

# Note: If you have factor from coreutils 8.20 or later (e.g. you're running
# Fedora), then GNU factor will be very fast and support at least 128-bit
# inputs (~44 digits).  Its growth is not great however, so 25+ digits starts
# getting slow.  The authors wrote on a forum that a future version will
# include a TinyQS, which should make it really rock for medium-size inputs.
#
# On the other hand, if you have the older factor (e.g. you're running
# Ubuntu) then GNU factor uses trial division so will be very painful for
# large numbers.  You'll probably want to turn it off here as it will be
# many thousands of times slower than MPU and Pari.

# A benchmarking note: in this script, getting MPU and Pari results are done
# by calling a function, where getting GNU factor results are done via
# multiple shells to /usr/bin/factor with the inputs as command line
# arguments.  This adds a lot of overhead that has nothing to do with their
# implementation.  For comparison, I've included an option for getting MPU
# factoring via calling the factor.pl script.  Weep at the startup cost.

my $do_gnu = 1;
my $do_pari = 1;
my $use_mpu_factor_script = 0;

if ($do_pari) {
  $do_pari = 0 unless eval { require Math::Pari; Math::Pari->import(); 1; };
}

{ # Test from 2 to 10000
  print "    2 -  1000"; test_array(    2 ..  1000);
  print " 1001 -  5000"; test_array( 1001 ..  5000);
  print " 5001 - 10000"; test_array( 5001 .. 10000);
}

foreach my $digits (5 .. $maxdigits) {
  printf "%5d %2d-digit numbers", $num, $digits;
  my @narray = gendigits($digits, $num);
  test_array(@narray);
  $num = int($num * 0.9) + 1; # reduce as we go
}

sub test_array {
  my @narray = @_;
  my($start, $mpusec, $gnusec, $parisec, $diff);
  my(@mpuarray, @gnuarray, @pariarray);

  print ".";
  $start = [gettimeofday];
  @mpuarray = mpu_factors(@narray);
  $mpusec = tv_interval($start);

  if ($do_gnu) {
    print ".";
    $start = [gettimeofday];
    @gnuarray = gnu_factors(@narray);
    $gnusec = tv_interval($start);
  }

  if ($do_pari) {
    print ".";
    $start = [gettimeofday];
    @pariarray = pari_factors(@narray);
    $parisec = tv_interval($start);
  }

  print ".";
  die "MPU got ", scalar @mpuarray, " factors.  GNU factor got ",
      scalar @gnuarray, "\n" unless !$do_gnu || $#mpuarray == $#gnuarray;
  die "MPU got ", scalar @mpuarray, " factors.  Pari factor got ",
      scalar @pariarray, "\n" unless !$do_pari || $#mpuarray == $#pariarray;
  foreach my $n (@narray) {
    my @mpu  = @{shift @mpuarray};
    die "mpu array is for the wrong n?" unless $n == shift @mpu;
    if ($do_gnu) {
      my @gnu  = @{shift @gnuarray};
      die "gnu array is for the wrong n?" unless $n == shift @gnu;
      $diff = diff \@mpu, \@gnu, { STYLE => 'Table' };
      die "factor($n): MPU/GNU\n$diff\n" if length($diff) > 0;
    }
    if ($do_pari) {
      my @pari = @{shift @pariarray};
      die "pari array is for the wrong n?" unless $n == shift @pari;
      my $diff = diff \@mpu, \@pari, { STYLE => 'Table' };
      die "factor($n): MPU/Pari\n$diff\n" if length($diff) > 0;
    }
  }
  print ".";
  # We should ignore the small digits, since we're comparing direct
  # Perl functions with multiple command line invocations.  It really
  # doesn't make sense until we're over 1ms per number.
  printf "  MPU:%8.4f ms", (($mpusec*1000) / scalar @narray);
  printf("  GNU:%8.4f ms", (($gnusec*1000) / scalar @narray)) if $do_gnu;
  printf("  Pari:%8.4f ms", (($parisec*1000) / scalar @narray)) if $do_pari;
  print "\n";
}

sub gendigits {
  my $digits = shift;
  die "Digits must be > 0" unless $digits > 0;
  my $howmany = shift;
  my ($base, $max);

  if ( 10**$digits < ~0) {
    $base = ($digits == 1) ? 0 : int(10 ** ($digits-1));
    $max = int(10 ** $digits);
    $max = ~0 if $max > ~0;
  } else {
    $base = Math::BigInt->new(10)->bpow($digits-1);
    $max = Math::BigInt->new(10)->bpow($digits) - 1;
  }
  my @nums;
  if ($semiprimes) {
    # This is a lousy way to do it.  We should generate a half-size prime, then
    # generate a prime whose product with the first falls in range.  Or even
    # just two half-size until the product is in range.
    for (1.. $howmany) {
      my $c;
      while (1) {
        $c = $base + urandomm($max-$base);
        my @f = factor($c);
        next if scalar(@f) != 2;
        last if $digits < 8;
        last if $digits < 12 && $f[0] > 1000;
        last if $digits < 16 && $f[0] > 100000;
        last if                 $f[0] > 10000000;
      }
      push @nums, $c;
    }
  } else {
    @nums = map { $base + urandomm($max-$base) } (1 .. $howmany);
  }
  #for (@nums) { print "$_ [",join(" ",factor($_)),"]   " }
  return @nums;
}

sub mpu_factors {
  my @piarray;

  if (!$use_mpu_factor_script) {
    push @piarray, [$_, factor($_)] for @_;
  } else {
    my @ns = @_;
    my $numpercommand = int( (4000-30)/(length($ns[-1])+1) );
    while (@ns) {
      my $cs = join(" ", 'perl -Iblib/lib -Iblib/arch bin/factor.pl', splice(@ns, 0, $numpercommand));
      my $fout = qx{$cs};
      my @flines = split(/\n/, $fout);
      foreach my $fline (@flines) {
        $fline =~ s/^(\d+): //;
        push @piarray, [$1, split(/ /, $fline)];
      }
    }
  }
  @piarray;
}

sub gnu_factors {
  my @ns = @_;
  my @piarray;
  my $numpercommand = int( (4000-30)/(length($ns[-1])+1) );

  while (@ns) {
    my $cs = join(" ", '/usr/bin/factor', splice(@ns, 0, $numpercommand));
    my $fout = qx{$cs};
    my @flines = split(/\n/, $fout);
    foreach my $fline (@flines) {
      $fline =~ s/^(\d+): //;
      push @piarray, [$1, split(/ /, $fline)];
    }
  }
  @piarray;
}

sub pari_factors {
  my @piarray;
  foreach my $n (@_) {
    my @factors;
    my ($pn,$pc) = @{Math::Pari::factorint($n)};
    # Map the Math::Pari objects returned into Math::BigInts, because Pari will
    # throw a hissy fit later when we try to compare them to anything else.
    push @piarray, [ $n, map { (Math::BigInt->new($pn->[$_])) x $pc->[$_] } (0 .. $#$pn) ];
  }
  @piarray;
}