File: benchmark

package info (click to toggle)
python-s3transfer 0.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,464 kB
  • sloc: python: 15,756; makefile: 9
file content (138 lines) | stat: -rwxr-xr-x 4,296 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python
"""
Use for benchmarking performance of other scripts. Provides data about
time, memory use, cpu usage, network in, network out about the script ran in
the form of a csv.


Usage
=====

NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running.

To use the script, run::

    ./benchmark "./my-script-to-run"


If no ``--output-file`` was provided, the data will be saved to
``performance.csv``
"""

import argparse
import os
import subprocess
import sys
import time

import psutil

# Determine the interface to track network IO depending on the platform.
if sys.platform.startswith('linux'):
    INTERFACE = 'eth0'
elif sys.platform == 'darwin':
    INTERFACE = 'en0'
else:
    # TODO: Add support for windows. This would require figuring out what
    # interface to use on windows.
    raise RuntimeError(f'Script cannot be run on {sys.platform}')


def benchmark(args):
    parent_pid = os.getpid()
    child_p = run_script(args)
    try:
        # Benchmark the process where the script is being ran.
        return run_benchmark(child_p.pid, args.output_file, args.data_interval)
    except KeyboardInterrupt:
        # If there is an interrupt, then try to clean everything up.
        proc = psutil.Process(parent_pid)
        procs = proc.children(recursive=True)

        for child in procs:
            child.terminate()

        gone, alive = psutil.wait_procs(procs, timeout=1)
        for child in alive:
            child.kill()
        return 1


def run_script(args):
    return subprocess.Popen(args.script, shell=True)


def run_benchmark(pid, output_file, data_interval):
    p = psutil.Process(pid)
    previous_net = psutil.net_io_counters(pernic=True)[INTERFACE]
    previous_time = time.time()

    with open(output_file, 'w') as f:
        while p.is_running():
            if p.status() == psutil.STATUS_ZOMBIE:
                p.kill()
                break
            time.sleep(data_interval)
            process_to_measure = _get_underlying_python_process(p)
            try:
                # Collect the memory and cpu usage.
                memory_used = process_to_measure.memory_info().rss
                cpu_percent = process_to_measure.cpu_percent()
                current_net = psutil.net_io_counters(pernic=True)[INTERFACE]
            except (psutil.AccessDenied, psutil.ZombieProcess):
                # Trying to get process information from a closed or zombie process will
                # result in corresponding exceptions.
                break

            # Collect data on the in/out network io.
            sent_delta = current_net.bytes_sent - previous_net.bytes_sent
            recv_delta = current_net.bytes_recv - previous_net.bytes_recv

            # Determine the lapsed time to determine the network io rate.
            current_time = time.time()
            previous_net = current_net
            dt = current_time - previous_time
            previous_time = current_time
            sent_rate = sent_delta / dt
            recv_rate = recv_delta / dt

            # Save all of the data into a CSV file.
            f.write(
                f"{current_time},{memory_used},{cpu_percent},"
                f"{sent_rate},{recv_rate}\n"
            )
            f.flush()
    return 0


def _get_underlying_python_process(process):
    # For some scripts such as the streaming CLI commands, the process is
    # nested under a shell script that does not account for the python process.
    # We want to always be measuring the python process.
    children = process.children(recursive=True)
    for child_process in children:
        if 'python' in child_process.name().lower():
            return child_process
    return process


def main():
    parser = argparse.ArgumentParser(usage=__doc__)
    parser.add_argument('script', help='The script to run for benchmarking')
    parser.add_argument(
        '--data-interval',
        default=1,
        type=float,
        help='The interval in seconds to poll for data points',
    )
    parser.add_argument(
        '--output-file',
        default='performance.csv',
        help='The file to output the data collected to',
    )
    args = parser.parse_args()
    return benchmark(args)


if __name__ == '__main__':
    sys.exit(main())