File: parallelpi.py

package info (click to toggle)
ipyparallel 8.8.0-6
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 12,412 kB
  • sloc: python: 21,991; javascript: 267; makefile: 29; sh: 28
file content (69 lines) | stat: -rw-r--r-- 1,966 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Calculate statistics on the digits of pi in parallel.

This program uses the functions in :file:`pidigits.py` to calculate
the frequencies of 2 digit sequences in the digits of pi. The
results are plotted using matplotlib.

To run, text files from https://www.super-computing.org/
must be installed in the working directory of the IPython engines.
The actual filenames to be used can be set with the ``filestring``
variable below.

The dataset we have been using for this is the 200 million digit one here:
ftp://pi.super-computing.org/.2/pi200m/

and the files used will be downloaded if they are not in the working directory
of the IPython engines.
"""

from timeit import default_timer as clock

from matplotlib import pyplot as plt
from pidigits import (
    compute_two_digit_freqs,
    fetch_pi_file,
    plot_two_digit_freqs,
    reduce_freqs,
)

import ipyparallel as ipp

# Files with digits of pi (10m digits each)
filestring = 'pi200m.ascii.%(i)02dof20'
files = [filestring % {'i': i} for i in range(1, 21)]

# Connect to the IPython cluster
c = ipp.Client()
c[:].run('pidigits.py')

# the number of engines
n = len(c)
id0 = c.ids[0]
v = c[:]
v.block = True
# fetch the pi-files
print("downloading %i files of pi" % n)
v.map(fetch_pi_file, files[:n])  # noqa: F821
print("done")

# Run 10m digits on 1 engine
t1 = clock()
freqs10m = c[id0].apply_sync(compute_two_digit_freqs, files[0])
t2 = clock()
digits_per_second1 = 10.0e6 / (t2 - t1)
print("Digits per second (1 core, 10m digits):   ", digits_per_second1)


# Run n*10m digits on all engines
t1 = clock()
freqs_all = v.map(compute_two_digit_freqs, files[:n])
freqs150m = reduce_freqs(freqs_all)
t2 = clock()
digits_per_second8 = n * 10.0e6 / (t2 - t1)
print("Digits per second (%i engines, %i0m digits): " % (n, n), digits_per_second8)

print("Speedup: ", digits_per_second8 / digits_per_second1)

plot_two_digit_freqs(freqs150m)
plt.title("2 digit sequences in %i0m digits of pi" % n)
plt.show()