1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
####################################################################################################
# Copyright (C) by the DBCSR developers group - All rights reserved #
# This file is part of the DBCSR library. #
# #
# For information on the license, see the LICENSE file. #
# For further information please visit https://dbcsr.cp2k.org #
# SPDX-License-Identifier: GPL-2.0+ #
####################################################################################################
import os
import random
import json
import argparse
def format_to_cpp(kernels):
"""Given a list of kernels represented as dictionaries, return a string representing them as C++ vector of vectors
using initializer lists"""
kernels = sorted(kernels, key=lambda k: (k["m"], k["n"], k["k"]))
out = ""
init_list_line = " {{{m:>2}, {n:>2}, {k:>2}}},\n"
for k in kernels:
out += init_list_line.format(m=k["m"], n=k["n"], k=k["k"])
return out
# ===============================================================================
def main(
dbcsr_base_dir,
libsmm_acc_base_dir,
test_template_dir,
test_output_dir,
gpu_version,
nsamples,
):
"""
Generate a performance test of libsmm_acc in the form of a CUDA or HIP file, using libsmm_acc_timer_multiply.cpp.template as
a template
"""
# Read parameter file
print("GPU version: {}".format(gpu_version))
param_fn = os.path.join(
libsmm_acc_base_dir,
os.path.join("parameters", "parameters_{}.json".format(gpu_version)),
)
with open(param_fn, "r") as f:
all_kernels = json.load(f)
# Get the autotuned kernels to test
autotuned_kernels = [k for k in all_kernels if k["source"] == "autotuned"]
print("Found {:,} autotuned kernels".format(len(autotuned_kernels)))
kernels_to_print_autotuned = format_to_cpp(autotuned_kernels)
# Get the non-autotuned kernels to test
predicted_kernels = [k for k in all_kernels if k["source"] != "autotuned"]
print("Found {:,} predicted kernels".format(len(predicted_kernels)))
num_predicted_kernels = len(predicted_kernels)
if num_predicted_kernels > 0:
if nsamples >= num_predicted_kernels:
nsamples = num_predicted_kernels
kernels_to_test_predicted = random.sample(predicted_kernels, nsamples)
kernels_to_print_predicted = format_to_cpp(kernels_to_test_predicted)
else:
kernels_to_test_predicted = list()
kernels_to_print_predicted = ""
# Print to test file
file_template = os.path.join(
test_template_dir, "libsmm_acc_timer_multiply.cpp.template"
)
file_generate = os.path.join(test_output_dir, "libsmm_acc_timer_multiply.cpp")
with open(file_template, "r") as f:
test = f.read()
test = test.replace(
"[[AUTOTUNED_KERNELS_HERE]]", kernels_to_print_autotuned.lstrip()
)
test = test.replace(
"[[PREDICTED_KERNELS_HERE]]", kernels_to_print_predicted.lstrip()
)
with open(file_generate, "w") as f:
f.write(test)
print(
"Wrote {:,} test kernels to {}".format(
len(autotuned_kernels + kernels_to_test_predicted), file_generate
)
)
# ===============================================================================
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="""
Generate a performance test of libsmm_acc in the form of a CUDA or HIP file, using
libsmm_acc_timer_multiply.cpp.template as a template
""",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"-f", "--base_dir", metavar="DBCSRHOME", default="", help="DBCSR base directory"
)
parser.add_argument(
"-o",
"--out_dir",
metavar="OUTDIR",
default="tests",
help="Directory in which to write the generated test files. Expressed relatively to base_dir",
)
parser.add_argument(
"-g",
"--gpu_version",
metavar="GPU_VERSION",
default="P100",
help="GPU card version, used to select the appropriate libsmm_acc parameters file",
)
parser.add_argument(
"-n",
"--nsamples",
default=1000,
help=(
"Number of samples from the matrix sizes space 4 <= m,n,k <= 45 (except autotuned kernels)"
" to sample for performance testing"
),
)
args = parser.parse_args()
# Folders in/to which to read/write files
libsmm_acc_base_dir = os.path.join(args.base_dir, "src/acc/libsmm_acc")
test_template_dir = os.path.join(args.base_dir, "tests")
test_output_dir = os.path.join(args.base_dir, args.out_dir)
main(
args.base_dir,
libsmm_acc_base_dir,
test_template_dir,
test_output_dir,
args.gpu_version,
args.nsamples,
)
|