1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import pandas as pd
from collections import defaultdict, OrderedDict
def _compute_gpu_projection_df(nvtx_df, cuda_df, cuda_nvtx_index_map):
# Each NVTX index will be associated with the minimum start time and the
# maximum end time of the CUDA operations that the corresponsing NVTX range
# encloses.
nvtx_gpu_start_dict = OrderedDict()
nvtx_gpu_end_dict = OrderedDict()
for cuda_row in cuda_df.itertuples():
if cuda_row.Index not in cuda_nvtx_index_map:
continue
nvtx_indices = cuda_nvtx_index_map[cuda_row.Index]
for nvtx_index in nvtx_indices:
if nvtx_index not in nvtx_gpu_start_dict:
nvtx_gpu_start_dict[nvtx_index] = cuda_row.gpu_start
nvtx_gpu_end_dict[nvtx_index] = cuda_row.gpu_end
continue
if cuda_row.gpu_start < nvtx_gpu_start_dict[nvtx_index]:
nvtx_gpu_start_dict[nvtx_index] = cuda_row.gpu_start
if cuda_row.gpu_end > nvtx_gpu_end_dict[nvtx_index]:
nvtx_gpu_end_dict[nvtx_index] = cuda_row.gpu_end
return pd.DataFrame(
{
"text": nvtx_df.loc[nvtx_gpu_end_dict.keys(), "text"],
"start": nvtx_gpu_start_dict,
"end": nvtx_gpu_end_dict,
}
)
def _find_cuda_nvtx_ranges(nvtx_df, cuda_df):
# Each CUDA index will be associated with a set of indices of NVTX ranges
# that enclose the corresponding CUDA operation.
cuda_nvtx_index_map = defaultdict(set)
cuda_time_df = pd.DataFrame(
data={"start": cuda_df["start"], "end": cuda_df["end"]}
).sort_values("start")
nvtx_start_df = pd.DataFrame(data={"time": nvtx_df["start"]}).sort_values("time")
nvtx_end_df = pd.DataFrame(data={"time": nvtx_df["end"]}).sort_values("time")
cuda_iter = iter(cuda_time_df.itertuples())
nvtx_start_iter = iter(nvtx_start_df.itertuples())
nvtx_end_iter = iter(nvtx_end_df.itertuples())
cuda_row = next(cuda_iter)
nvtx_start_row = next(nvtx_start_iter)
nvtx_end_row = next(nvtx_end_iter)
nvtx_active_indices = set()
while True:
if (
nvtx_start_row is not None
and nvtx_start_row.time <= nvtx_end_row.time
and nvtx_start_row.time <= cuda_row.start
):
nvtx_active_indices.add(nvtx_start_row.Index)
try:
nvtx_start_row = next(nvtx_start_iter)
except StopIteration:
nvtx_start_row = None
elif nvtx_end_row.time <= cuda_row.start or nvtx_end_row.time <= cuda_row.end:
nvtx_active_indices.remove(nvtx_end_row.Index)
try:
nvtx_end_row = next(nvtx_end_iter)
except StopIteration:
break
else:
if nvtx_active_indices:
cuda_nvtx_index_map[cuda_row.Index] = nvtx_active_indices.copy()
try:
cuda_row = next(cuda_iter)
except StopIteration:
break
return dict(cuda_nvtx_index_map)
def project_nvtx_onto_gpu(nvtx_df, cuda_df):
"""Project the NVTX ranges from the CPU onto the GPU.
The projected range will have the start timestamp of the first enclosed GPU
operation and the end timestamp of the last enclosed GPU operation.
"""
# Filter ranges that are incomplete or end on a different thread.
filtered_nvtx_df = nvtx_df[
nvtx_df["start"].notnull()
& nvtx_df["end"].notnull()
& nvtx_df["endGlobalTid"].isnull()
]
cuda_nvtx_index_map = {}
nvtx_gdf = filtered_nvtx_df.groupby("globalTid")
cuda_gdf = cuda_df.groupby("globalTid")
for global_tid, nvtx_tid_df in nvtx_gdf:
if global_tid not in cuda_gdf.groups:
continue
cuda_tid_df = cuda_gdf.get_group(global_tid)
cuda_nvtx_tid_index_map = _find_cuda_nvtx_ranges(nvtx_tid_df, cuda_tid_df)
cuda_nvtx_index_map.update(cuda_nvtx_tid_index_map)
return _compute_gpu_projection_df(filtered_nvtx_df, cuda_df, cuda_nvtx_index_map)
|