File: combine_plots.py

package info (click to toggle)
hipblas 5.5.1-4
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 14,352 kB
  • sloc: cpp: 114,952; f90: 26,193; python: 4,618; sh: 954; ansic: 628; makefile: 45; xml: 23
file content (186 lines) | stat: -rw-r--r-- 9,039 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python3
import argparse
import os
import re
import sys
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

parser = argparse.ArgumentParser(description='Used with completed datasets from performancereport.py to compare different functions\' performance.\n' +
                                             'Works with hipblas-bench and hipBLAS\' performancereport.py. Can also be used with rocblas-bench and rocBLAS\' performancereport.py with ' +
                                             'command line arguments.',
                                 epilog='Example usage: ./combine_plots.py -s N -d ./output_gemm/run00 -d ./output_symm/run00')
parser.add_argument('-s', '--size_arg', help='Value which to base the x axis "size" on.', dest='size_arg', default='N')
parser.add_argument('-d', '--dir', action='append', help='Each directory containing data which you would like to compare.', dest='dirs', required=True)
parser.add_argument('-t', '--titles', action='append', help='The subtitle name for the resulting plot for each directory, in the same order as the arguments passed in --dir.', dest='titles')
parser.add_argument('-f', '--savedir', type=str, help='Directory where resulting plots will be saved.', dest='savedir', default='combine_plots')
parser.add_argument('-x', '--executable', type=str, help='Name of the executable used to run perf tests.', dest='executable_name', default='hipblas-bench')
parser.add_argument('--search_str', type=str, help='A search string to find data line in data files.', dest='search_string', default='hipblas-Gflops')

args = parser.parse_args()

def get_all_files(directory_str):
    """
    returns a list of *.out files in the given directory.

    Parameters:
        directory_str (string): the directory which we are gathering files from
    Returns:
        ret_list (list[string]): a list of file paths in the directory which end in .out
    """
    ret_list = []

    # only reading .out files as they contain the input parameters we need (func name, precision),
    # along with the output parameters we need (gflops)
    for f in os.listdir(os.fsencode(directory_str)):
        filename = os.fsdecode(f)
        if filename.endswith(".out"):
            ret_list.append(os.path.join(directory_str, filename))

    return ret_list

def get_output_val_from_file(filename, output_param='hipblas-Gflops', gflops_str='hipblas-Gflops'):
    """
    parses through file and returns the val as given in the file.

    Parameters:
        filename (string): path of the file to parse.
        output_param (string): the output parameter which we are parsing the file for.
        gflops_str (string): string to parse file for in csv portion.
    Returns:
        value (string): the gflops as listed in the file, refer to example file (TODO).
    """
    if os.path.exists(filename):
        lines = open(filename, 'r').readlines()

        for i in range(0, len(lines)):
            if(output_param in lines[i]):
                arg_line = lines[i].split(",")
                data_line = re.split(r',\s*(?![^()]*\))', lines[i+1])
                idx = arg_line.index(gflops_str)
                return data_line[idx]

    return '-1'

def get_input_param_from_file(filename, input_param, executable_name = 'hipblas-bench'):
    """
    parses through file and returns the function name as given in the file (by the -f argument passed to xxx-bench).

    Parameters:
        filename (string): path of the file to parse.
        input_param (string): the input parameter which we are parsing the file for. For example, '-f' to parse function name.
        executable_name (string): executable name that we can use to parse the function name in the data file
    Returns:
        funcname (string): function name as given by the -f argument passed to xxx-bench.
    """
    if os.path.exists(filename):
        lines = open(filename, 'r').readlines()

    for line in lines:
        if executable_name in line:
            linesplit = line.split()
            return linesplit[linesplit.index(input_param)+1]

    raise RuntimeError('Cannot find input param ' + input_param + ' in file: ' + filename)

def get_data_from_directories(directories, size_param = 'N', executable_name = 'hipblas-bench', search_string = 'hipblas-Gflops'):
    """
    For each directory in directories, gathers function name, precisions, sizes, and gflops from within files in that directory and returns it.

    Parameters:
        directories (list[string]): list of directory names to gather information from
        size_param (string): parameter in file which defines the "size"
        executable_name (string): executable name that we can use to parse the function name in the data file
        search_string (string): the string which we used to find the data line in the .out file
    Returns:
        res_dicts (list[dict{string: list[(int, float)]}]): for each directory; for each precision in any file within that directory, this dictionary contains a list
                                                            of tuples for that precision containing sizes (as defined by size_param) and the corresponding
                                                            gflops value from a file.
        res_funcs (list[string]): a list of function names, one function name is gathered from each directory. Each function name corresponds to dictionary
                                  at the same index in res_dicts.
    """
    res_dicts = []
    res_funcs = []
    for directory in directories:
        cur_funcname = None
        cur_dict = {}

        for f in get_all_files(directory):
            # append funcname to list of funcnames, only for first file in each directory as we assume
            # each directory has data for only one function (but multiple precisions)
            if cur_funcname is None:
                cur_funcname = get_input_param_from_file(f, '-f')

            prec = get_input_param_from_file(f, '-r', executable_name)

            # a tuple of (size, gflops) as gathered from the current file
            size_perf_tuple = (int(get_output_val_from_file(f, search_string, size_param)), float(get_output_val_from_file(f, search_string)))
            if prec in cur_dict:
                cur_dict[prec].append(size_perf_tuple)
            else:
                cur_dict[prec] = [size_perf_tuple]

        res_dicts.append(cur_dict)
        res_funcs.append(cur_funcname)

    return res_dicts, res_funcs

def plot_data(gflops_dicts, titles, savedir, size_arg = 'N'):
    """
    plots gflops data from dictionaries, one plot for each common precision present in all dictionaries.

    Parameters:
        gflops_dicts (list[dict{string: list[(int, float)]}]): data as given by :func:`get_data_from_directories`.
        titles (list[string]): a list of titles for each data set to be plotted and used as a savefile name.
        savedir (string): directory where resulting plots will be saved.
        size_arg (string): x axis title on plot.
    """
    if len(gflops_dicts) == 0:
        return

    gflops_dict0 = gflops_dicts[0]
    for prec, _ in gflops_dict0.items():
        colors=iter(cm.rainbow(np.linspace(0,1,len(gflops_dicts))))
        figure, axes = plt.subplots(figsize=(7,7))
        for gflops_dict, funcname in zip(gflops_dicts, titles):
            cur_color = next(colors)
            if prec not in gflops_dict:
                continue
            gflops = gflops_dict[prec]
            gflops.append((0, 0)) # I prefer having a 0 at the bottom so the performance looks more accurate
            sorted_tuples = sorted(gflops)
            sorted_sizes = [x[0] for x in sorted_tuples]
            sorted_gflops = [x[1] for x in sorted_tuples]

            axes.scatter(sorted_sizes, sorted_gflops, color=cur_color, label=funcname)
            axes.plot(sorted_sizes, sorted_gflops, '-ok', color=cur_color)

        axes.set_xlabel('='.join(size_arg)) # in case we add multiple params
        axes.set_ylabel('gflops')

        # magic numbers from performancereport.py to make plots look nice
        axes.legend(fontsize=10, bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left',
                    mode='expand', borderaxespad=0.)
        figure.tight_layout(rect=(0,0.05,1.0,1.0))

        filename = ''
        for func in titles:
            if filename != '':
                filename += '_'
            filename += func
        filename += '_' + prec
        if not os.path.exists(savedir):
            os.makedirs(savedir)
        figure.savefig(os.path.join(os.getcwd(), savedir, filename))

gflops, funcnames = get_data_from_directories(args.dirs, args.size_arg, args.executable_name, args.search_string)

if args.titles:
    if len(args.titles) == len(gflops):
        funcnames = args.titles
    else:
        raise RuntimeError('Must have same amount of -t parameters as -d parameters, or have no -t parameters.')

plot_data(gflops, funcnames, args.savedir, args.size_arg)