1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
|
#pragma once
#include <ATen/core/ivalue.h>
#include <torch/csrc/jit/codegen/cuda/fusion.h>
#include <torch/csrc/jit/codegen/cuda/scheduler/reduction_heuristic.h>
// TODO: If caching inputs would require persistence we are sending it to the
// persistent kerenl scheduler. This isn't necessary if the only persistent
// buffers are inputs as we could re-read them from global memory. Need to
// consider if this is worth implementing.
namespace torch {
namespace jit {
namespace fuser {
namespace cuda {
class SchedulerRuntimeInfo;
class HeuristicSummary;
TORCH_CUDA_CU_API std::shared_ptr<ReductionParams> getPersistentHeuristics(
Fusion* fusion,
const at::ArrayRef<c10::IValue>& runtime_inputs,
HeuristicSummary* data_cache = nullptr);
TORCH_CUDA_CU_API std::shared_ptr<ReductionParams> getPersistentHeuristics(
Fusion* fusion,
SchedulerRuntimeInfo& runtime_info,
HeuristicSummary* data_cache = nullptr);
TORCH_CUDA_CU_API void schedulePersistentKernel(
Fusion* fusion,
const ReductionParams& rparams);
} // namespace cuda
} // namespace fuser
} // namespace jit
} // namespace torch
|