File: run_crawl.py

package info (click to toggle)

mpi4py 3.0.3-8

links: PTS, VCS
area: main
in suites: bullseye
size: 12,428 kB
sloc: python: 18,672; javascript: 9,118; ansic: 7,092; makefile: 567; sh: 183; f90: 158; cpp: 103

file content (34 lines) | stat: -rw-r--r-- 881 bytes

parent folder | download | duplicates (2)

from __future__ import print_function
from __future__ import division

try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

from mpi4py.futures import MPIPoolExecutor

URLS = [
    'http://www.google.com/',
    'http://www.apple.com/',
    'http://www.ibm.com/',
    'http://www.slashdot.org/',
    'http://www.python.org/',
    'http://www.bing.com/',
    'http://www.facebook.com/',
    'http://www.yahoo.com/',
    'http://www.youtube.com/',
    'http://www.blogger.com/',
]

def load_url(url):
    return url, urlopen(url).read()

def test_crawl():
    with MPIPoolExecutor(10) as executor:
        for url, content in executor.map(load_url, URLS,
                                         timeout=10, unordered=True):
            print('%-25s: %6.2f KiB' % (url, len(content)/(1 << 10)))

if __name__ == '__main__':
    test_crawl()