File: bench_html.py

package info (click to toggle)
firefox 143.0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,617,328 kB
  • sloc: cpp: 7,478,492; javascript: 6,417,157; ansic: 3,720,058; python: 1,396,372; xml: 627,523; asm: 438,677; java: 186,156; sh: 63,477; makefile: 19,171; objc: 13,059; perl: 12,983; yacc: 4,583; cs: 3,846; pascal: 3,405; lex: 1,720; ruby: 1,003; exp: 762; php: 436; lisp: 258; awk: 247; sql: 66; sed: 53; csh: 10
file content (57 lines) | stat: -rw-r--r-- 1,570 bytes parent folder | download | duplicates (26)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import io
import os
import sys

import pyperf

sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
import html5lib  # noqa: E402


def bench_parse(fh, treebuilder):
    fh.seek(0)
    html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)


def bench_serialize(loops, fh, treebuilder):
    fh.seek(0)
    doc = html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)

    range_it = range(loops)
    t0 = pyperf.perf_counter()

    for loops in range_it:
        html5lib.serialize(doc, tree=treebuilder, encoding="ascii", inject_meta_charset=False)

    return pyperf.perf_counter() - t0


BENCHMARKS = ["parse", "serialize"]


def add_cmdline_args(cmd, args):
    if args.benchmark:
        cmd.append(args.benchmark)


if __name__ == "__main__":
    runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
    runner.metadata["description"] = "Run benchmarks based on Anolis"
    runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS)

    args = runner.parse_args()
    if args.benchmark:
        benchmarks = (args.benchmark,)
    else:
        benchmarks = BENCHMARKS

    with open(os.path.join(os.path.dirname(__file__), "data", "html.html"), "rb") as fh:
        source = io.BytesIO(fh.read())

    if "parse" in benchmarks:
        for tb in ("etree", "dom", "lxml"):
            runner.bench_func("html_parse_%s" % tb, bench_parse, source, tb)

    if "serialize" in benchmarks:
        for tb in ("etree", "dom", "lxml"):
            runner.bench_time_func("html_serialize_%s" % tb, bench_serialize, source, tb)