File: run_crawl.py

package info (click to toggle)
mpi4py 4.1.0-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,540 kB
  • sloc: python: 34,465; ansic: 16,475; makefile: 614; sh: 325; cpp: 193; f90: 178
file content (33 lines) | stat: -rw-r--r-- 755 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from urllib.request import urlopen

from mpi4py.futures import MPIPoolExecutor

URLS = [
    "http://www.google.com/",
    "http://www.apple.com/",
    "http://www.ibm.com/",
    "http://www.slashdot.org/",
    "http://www.python.org/",
    "http://www.bing.com/",
    "http://www.facebook.com/",
    "http://www.github.com/",
    "http://www.youtube.com/",
    "http://www.blogger.com/",
]


def load_url(url):
    return url, urlopen(url).read()


def test_crawl():
    with MPIPoolExecutor(10) as executor:
        for url, content in executor.map(
            load_url, URLS, timeout=10, unordered=True
        ):
            kib = len(content) / (1 << 10)
            print(f"{url:25s}: {kib:6.2f} KiB")


if __name__ == "__main__":
    test_crawl()