1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
#!/usr/bin/env python
import re
import timeit
from enum import Enum
from typing import Callable
from tornado.httputil import HTTPHeaders
from tornado.options import define, options, parse_command_line
define("benchmark", type=str)
define("num_runs", type=int, default=1)
_CRLF_RE = re.compile(r"\r?\n")
_TEST_HEADERS = (
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,"
"image/apng,*/*;q=0.8,application/signed-exchange;v=b3\r\n"
"Accept-Encoding: gzip, deflate, br\r\n"
"Accept-Language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7\r\n"
"Cache-Control: max-age=0\r\n"
"Connection: keep-alive\r\n"
"Host: example.com\r\n"
"Upgrade-Insecure-Requests: 1\r\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36\r\n"
)
def headers_split_re(headers: str) -> None:
for line in _CRLF_RE.split(headers):
pass
def headers_split_simple(headers: str) -> None:
for line in headers.split("\n"):
if line.endswith("\r"):
line = line[:-1]
def headers_parse_re(headers: str) -> HTTPHeaders:
h = HTTPHeaders()
for line in _CRLF_RE.split(headers):
if line:
h.parse_line(line)
return h
def headers_parse_simple(headers: str) -> HTTPHeaders:
h = HTTPHeaders()
for line in headers.split("\n"):
if line.endswith("\r"):
line = line[:-1]
if line:
h.parse_line(line)
return h
def run_headers_split():
regex_time = timeit.timeit(lambda: headers_split_re(_TEST_HEADERS), number=100000)
print("regex", regex_time)
simple_time = timeit.timeit(
lambda: headers_split_simple(_TEST_HEADERS), number=100000
)
print("str.split", simple_time)
print("speedup", regex_time / simple_time)
def run_headers_full():
regex_time = timeit.timeit(lambda: headers_parse_re(_TEST_HEADERS), number=10000)
print("regex", regex_time)
simple_time = timeit.timeit(
lambda: headers_parse_simple(_TEST_HEADERS), number=10000
)
print("str.split", simple_time)
print("speedup", regex_time / simple_time)
class Benchmark(Enum):
def __new__(cls, arg_value: str, func: Callable[[], None]):
member = object.__new__(cls)
member._value_ = arg_value
member.func = func
return member
HEADERS_SPLIT = ("headers-split", run_headers_split)
HEADERS_FULL = ("headers-full", run_headers_full)
def main():
parse_command_line()
try:
func = Benchmark(options.benchmark).func
except ValueError:
known_benchmarks = [benchmark.value for benchmark in Benchmark]
print(
"Unknown benchmark: '{}', supported values are: {}"
.format(options.benchmark, ", ".join(known_benchmarks))
)
return
for _ in range(options.num_runs):
func()
if __name__ == '__main__':
main()
|