1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
import sys
import awkward as ak
import numpy as np
import cupy as cp
import time
from pathlib import Path
# Add current directory to path to import playground
sys.path.insert(0, str(Path(__file__).parent))
from playground import physics_analysis, physics_analysis_gpu, physics_analysis_cccl # noqa: E402
def generate_random_events(num_events=50000, seed=42):
"""
Generate random physics events with electrons and muons.
Args:
num_events: Number of events to generate
seed: Random seed for reproducibility
Returns:
Awkward Array with structure matching playground.py events
"""
np.random.seed(seed)
# Generate random counts for electrons and muons per event (0-10 each)
num_electrons_per_event = np.random.randint(0, 11, size=num_events)
num_muons_per_event = np.random.randint(0, 11, size=num_events)
total_electrons = np.sum(num_electrons_per_event)
total_muons = np.sum(num_muons_per_event)
# Generate random physics values for all electrons
electron_pts = np.random.uniform(10, 100, size=total_electrons)
electron_etas = np.random.uniform(-3, 3, size=total_electrons)
electron_phis = np.random.uniform(0, 2*np.pi, size=total_electrons)
# Generate random physics values for all muons
muon_pts = np.random.uniform(10, 100, size=total_muons)
muon_etas = np.random.uniform(-3, 3, size=total_muons)
muon_phis = np.random.uniform(0, 2*np.pi, size=total_muons)
# Build awkward arrays with jagged structure
electrons = ak.Array({
"pt": ak.unflatten(electron_pts, num_electrons_per_event),
"eta": ak.unflatten(electron_etas, num_electrons_per_event),
"phi": ak.unflatten(electron_phis, num_electrons_per_event),
})
muons = ak.Array({
"pt": ak.unflatten(muon_pts, num_muons_per_event),
"eta": ak.unflatten(muon_etas, num_muons_per_event),
"phi": ak.unflatten(muon_phis, num_muons_per_event),
})
events = ak.zip({"electrons": electrons, "muons": muons}, depth_limit=1)
print(f"Generated {num_events:,} events")
print(f" Total electrons: {total_electrons:,}")
print(f" Total muons: {total_muons:,}")
print(f" Avg electrons per event: {total_electrons/num_events:.2f}")
print(f" Avg muons per event: {total_muons/num_events:.2f}")
print()
return events
def benchmark_analysis(events):
"""
Benchmark the three analysis approaches with warmup runs.
Warmup runs are excluded from timing (only measure steady-state performance).
Args:
events: Awkward Array of events to analyze
"""
print("=" * 60)
print("BENCHMARKING PHYSICS ANALYSIS")
print("=" * 60)
print()
# Warmup and benchmark CPU version
print("Warming up physics_analysis (CPU)...")
_ = physics_analysis(events)
print("Running physics_analysis (CPU)...")
start = time.perf_counter()
result_cpu = physics_analysis(events)
time_cpu = time.perf_counter() - start
print(f" Time: {time_cpu:.4f} seconds")
print()
events_gpu = ak.to_backend(events, "cuda")
# Warmup and benchmark GPU native version
print("Warming up physics_analysis_gpu (GPU native)...")
_ = physics_analysis_gpu(events_gpu)
print("Running physics_analysis_gpu (GPU native)...")
start = time.perf_counter()
result_gpu = physics_analysis_gpu(events_gpu)
cp.cuda.Device().synchronize()
time_gpu = time.perf_counter() - start
print(f" Time: {time_gpu:.4f} seconds")
print()
# Warmup and benchmark CCCL version
print("Warming up physics_analysis_cccl (CCCL)...")
_ = physics_analysis_cccl(events_gpu)
print("Running physics_analysis_cccl (CCCL)...")
start = time.perf_counter()
result_cccl = physics_analysis_cccl(events_gpu)
cp.cuda.Device().synchronize()
time_cccl = time.perf_counter() - start
print(f" Time: {time_cccl:.4f} seconds")
print()
# Display summary
print("=" * 60)
print("RESULTS SUMMARY")
print("=" * 60)
print(f"CPU: {time_cpu:.4f} seconds (baseline)")
print(
f"GPU native: {time_gpu:.4f} seconds ({time_cpu/time_gpu:.2f}x speedup)")
print(
f"CCCL: {time_cccl:.4f} seconds ({time_cpu/time_cccl:.2f}x speedup)")
print()
# Print sample results to verify correctness
print("Sample results (first 5 events with 2 electrons):")
print(f" CPU electrons: {result_cpu['electron'][:5]}")
print(f" GPU electrons: {result_gpu['electron'][:5]}")
print(f" CCCL electrons: {result_cccl['electron'][:5]}")
print()
# Check correctness
print("Checking correctness...")
cp.testing.assert_allclose(
result_cpu['electron'], result_cccl['electron'])
print("Correctness check passed")
print()
if __name__ == "__main__":
# Generate random events at scale
events = generate_random_events(num_events=2**24)
# Run benchmarks
benchmark_analysis(events)
|