File: run_crawl.py

package info (click to toggle)
mpi4py 3.0.3-8
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 12,428 kB
  • sloc: python: 18,672; javascript: 9,118; ansic: 7,092; makefile: 567; sh: 183; f90: 158; cpp: 103
file content (34 lines) | stat: -rw-r--r-- 881 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from __future__ import print_function
from __future__ import division

try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

from mpi4py.futures import MPIPoolExecutor

URLS = [
    'http://www.google.com/',
    'http://www.apple.com/',
    'http://www.ibm.com/',
    'http://www.slashdot.org/',
    'http://www.python.org/',
    'http://www.bing.com/',
    'http://www.facebook.com/',
    'http://www.yahoo.com/',
    'http://www.youtube.com/',
    'http://www.blogger.com/',
]

def load_url(url):
    return url, urlopen(url).read()

def test_crawl():
    with MPIPoolExecutor(10) as executor:
        for url, content in executor.map(load_url, URLS,
                                         timeout=10, unordered=True):
            print('%-25s: %6.2f KiB' % (url, len(content)/(1 << 10)))

if __name__ == '__main__':
    test_crawl()