File: benchmark.py

package info (click to toggle)
textual-fastdatatable 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 10,052 kB
  • sloc: python: 3,461; makefile: 29
file content (158 lines) | stat: -rw-r--r-- 5,152 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from __future__ import annotations

import gc
from pathlib import Path
from time import perf_counter

import pandas as pd
import polars as pl
from textual.app import App, ComposeResult
from textual.driver import Driver
from textual.pilot import Pilot
from textual.types import CSSPathType
from textual.widgets import DataTable as BuiltinDataTable

from textual_fastdatatable import ArrowBackend
from textual_fastdatatable import DataTable as FastDataTable
from textual_fastdatatable.backend import PolarsBackend

BENCHMARK_DATA = Path(__file__).parent.parent / "tests" / "data"


async def scroller(pilot: Pilot) -> None:
    first_paint = perf_counter() - pilot.app.start  # type: ignore
    for _ in range(5):
        await pilot.press("pagedown")
    for _ in range(15):
        await pilot.press("right")
    for _ in range(5):
        await pilot.press("pagedown")
    elapsed = perf_counter() - pilot.app.start  # type: ignore
    pilot.app.exit(result=(first_paint, elapsed))


class BuiltinApp(App):
    TITLE = "Built-In DataTable"

    def __init__(
        self,
        data_path: Path,
        driver_class: type[Driver] | None = None,
        css_path: CSSPathType | None = None,
        watch_css: bool = False,
    ):
        super().__init__(driver_class, css_path, watch_css)
        self.data_path = data_path

    def compose(self) -> ComposeResult:
        df = pd.read_parquet(self.data_path)
        rows = [tuple(row) for row in df.itertuples(index=False)]
        self.start = perf_counter()
        table: BuiltinDataTable = BuiltinDataTable()
        table.add_columns(*[str(col) for col in df.columns])
        for row in rows:
            table.add_row(*row, height=1, label=None)
        yield table


class ArrowBackendApp(App):
    TITLE = "FastDataTable (Arrow from Parquet)"

    def __init__(
        self,
        data_path: Path,
        driver_class: type[Driver] | None = None,
        css_path: CSSPathType | None = None,
        watch_css: bool = False,
    ):
        super().__init__(driver_class, css_path, watch_css)
        self.data_path = data_path

    def compose(self) -> ComposeResult:
        self.start = perf_counter()
        yield FastDataTable(data=self.data_path)


class ArrowBackendAppFromRecords(App):
    TITLE = "FastDataTable (Arrow from Records)"

    def __init__(
        self,
        data_path: Path,
        driver_class: type[Driver] | None = None,
        css_path: CSSPathType | None = None,
        watch_css: bool = False,
    ):
        super().__init__(driver_class, css_path, watch_css)
        self.data_path = data_path

    def compose(self) -> ComposeResult:
        df = pd.read_parquet(self.data_path)
        rows = [tuple(row) for row in df.itertuples(index=False)]
        self.start = perf_counter()
        backend = ArrowBackend.from_records(rows, has_header=False)
        table = FastDataTable(
            backend=backend, column_labels=[str(col) for col in df.columns]
        )
        yield table


class PolarsBackendApp(App):
    TITLE = "FastDataTable (Polars from Parquet)"

    def __init__(
        self,
        data_path: Path,
        driver_class: type[Driver] | None = None,
        css_path: CSSPathType | None = None,
        watch_css: bool = False,
    ):
        super().__init__(driver_class, css_path, watch_css)
        self.data_path = data_path

    def compose(self) -> ComposeResult:
        self.start = perf_counter()
        yield FastDataTable(
            data=PolarsBackend.from_dataframe(pl.read_parquet(self.data_path))
        )


if __name__ == "__main__":
    app_defs = [
        BuiltinApp,
        ArrowBackendApp,
        ArrowBackendAppFromRecords,
        PolarsBackendApp,
    ]
    bench = [
        (f"lap_times_{n}.parquet", 3 if n <= 10000 else 1)
        for n in [100, 1000, 10000, 100000, 538121]
    ]
    bench.extend([(f"wide_{n}.parquet", 1) for n in [10000, 100000]])
    with open("results.md", "w") as f:
        print(
            "Records |",
            " | ".join([a.TITLE for a in app_defs]),  # type: ignore
            sep="",
            file=f,
        )
        print("--------|", "|".join(["--------" for _ in app_defs]), sep="", file=f)
        for p, tries in bench:
            first_paint: list[list[float]] = [list() for _ in app_defs]
            elapsed: list[list[float]] = [list() for _ in app_defs]
            for i, app_cls in enumerate(app_defs):
                for _ in range(tries):
                    app = app_cls(BENCHMARK_DATA / p)
                    gc.disable()
                    fp, el = app.run(headless=True, auto_pilot=scroller)  # type: ignore
                    gc.collect()
                    first_paint[i].append(fp)
                    elapsed[i].append(el)
            gc.enable()
            avg_first_paint = [sum(app_times) / tries for app_times in first_paint]
            avg_elapsed = [sum(app_times) / tries for app_times in elapsed]
            formatted = [
                f"{fp:7,.3f}s / {el:7,.3f}s"
                for fp, el in zip(avg_first_paint, avg_elapsed)
            ]
            print(f"{p} | {' | '.join(formatted)}", file=f)