File: parsing_benchmark.py

package info (click to toggle)
python-tornado 6.2.0-3%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,396 kB
  • sloc: python: 27,837; javascript: 156; sh: 99; ansic: 58; xml: 49; makefile: 48; sql: 23
file content (112 lines) | stat: -rw-r--r-- 3,085 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python
import re
import timeit
from enum import Enum
from typing import Callable

from tornado.httputil import HTTPHeaders
from tornado.options import define, options, parse_command_line


define("benchmark", type=str)
define("num_runs", type=int, default=1)


_CRLF_RE = re.compile(r"\r?\n")
_TEST_HEADERS = (
    "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,"
    "image/apng,*/*;q=0.8,application/signed-exchange;v=b3\r\n"
    "Accept-Encoding: gzip, deflate, br\r\n"
    "Accept-Language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7\r\n"
    "Cache-Control: max-age=0\r\n"
    "Connection: keep-alive\r\n"
    "Host: example.com\r\n"
    "Upgrade-Insecure-Requests: 1\r\n"
    "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36\r\n"
)


def headers_split_re(headers: str) -> None:
    for line in _CRLF_RE.split(headers):
        pass


def headers_split_simple(headers: str) -> None:
    for line in headers.split("\n"):
        if line.endswith("\r"):
            line = line[:-1]


def headers_parse_re(headers: str) -> HTTPHeaders:
    h = HTTPHeaders()
    for line in _CRLF_RE.split(headers):
        if line:
            h.parse_line(line)
    return h


def headers_parse_simple(headers: str) -> HTTPHeaders:
    h = HTTPHeaders()
    for line in headers.split("\n"):
        if line.endswith("\r"):
            line = line[:-1]
        if line:
            h.parse_line(line)
    return h


def run_headers_split():
    regex_time = timeit.timeit(lambda: headers_split_re(_TEST_HEADERS), number=100000)
    print("regex", regex_time)

    simple_time = timeit.timeit(
        lambda: headers_split_simple(_TEST_HEADERS), number=100000
    )
    print("str.split", simple_time)

    print("speedup", regex_time / simple_time)


def run_headers_full():
    regex_time = timeit.timeit(lambda: headers_parse_re(_TEST_HEADERS), number=10000)
    print("regex", regex_time)

    simple_time = timeit.timeit(
        lambda: headers_parse_simple(_TEST_HEADERS), number=10000
    )
    print("str.split", simple_time)

    print("speedup", regex_time / simple_time)


class Benchmark(Enum):
    def __new__(cls, arg_value: str, func: Callable[[], None]):
        member = object.__new__(cls)
        member._value_ = arg_value
        member.func = func
        return member

    HEADERS_SPLIT = ("headers-split", run_headers_split)
    HEADERS_FULL = ("headers-full", run_headers_full)


def main():
    parse_command_line()

    try:
        func = Benchmark(options.benchmark).func
    except ValueError:
        known_benchmarks = [benchmark.value for benchmark in Benchmark]
        print(
            "Unknown benchmark: '{}', supported values are: {}"
            .format(options.benchmark, ", ".join(known_benchmarks))
        )
        return

    for _ in range(options.num_runs):
        func()


if __name__ == '__main__':
    main()