File: test_s3_buffering.py

package info (click to toggle)
smart-open 7.5.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 980 kB
  • sloc: python: 8,054; sh: 90; makefile: 14
file content (23 lines) | stat: -rw-r--r-- 529 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from smart_open import open


def read_bytes(url, limit):
    bytes_ = []
    with open(url, 'rb') as fin:
        for i in range(limit):
            bytes_.append(fin.read(1))

    return bytes_


def test(benchmark):
    #
    # This file is around 850MB.
    #
    url = (
        's3://commoncrawl/crawl-data/CC-MAIN-2019-51/segments/1575541319511.97'
        '/warc/CC-MAIN-20191216093448-20191216121448-00559.warc.gz'
    )
    limit = 1000000
    bytes_ = benchmark(read_bytes, url, limit)
    assert len(bytes_) == limit