1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
|
# mypy: allow-untyped-defs
from __future__ import annotations
import io
import logging
import os
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
import torch._inductor.config
import torch.fx
if TYPE_CHECKING:
from torch._inductor.utils import InputType
from torch.export import ExportedProgram
__all__ = [
"compile",
"list_mode_options",
"list_options",
"cudagraph_mark_step_begin",
]
log = logging.getLogger(__name__)
def compile(
gm: torch.fx.GraphModule,
example_inputs: List[InputType],
options: Optional[Dict[str, Any]] = None,
):
"""
Compile a given FX graph with TorchInductor. This allows compiling
FX graphs captured without using TorchDynamo.
Args:
gm: The FX graph to compile.
example_inputs: List of tensor inputs.
options: Optional dict of config options. See `torch._inductor.config`.
Returns:
Callable with same behavior as gm but faster.
"""
from .compile_fx import compile_fx
return compile_fx(gm, example_inputs, config_patches=options)
def aoti_compile_and_package(
exported_program: ExportedProgram,
_deprecated_unused_args=None,
_deprecated_unused_kwargs=None,
*,
package_path: Optional[Union[str, io.BytesIO]] = None,
inductor_configs: Optional[Dict[str, Any]] = None,
) -> str:
"""
Compiles the exported program with AOTInductor, and packages it into a .pt2
artifact specified by the input package_path. To load the package, you can
call `torch._inductor.aoti_load_package(package_path)`.
To compile and save multiple models into a single .pt2 artifact, you can do
the following:
```
ep1 = torch.export.export(M1(), ...)
aoti_file1 = torch._inductor.aot_compile(ep1, ...)
ep2 = torch.export.export(M2(), ...)
aoti_file2 = torch._inductor.aot_compile(ep2, ...)
from torch._inductor.package import package_aoti, load_package
package_aoti("my_package.pt2", {"model1": aoti_file1, "model2": aoti_file2})
compiled_model1 = load_package("my_package.pt2", "model1")
compiled_model2 = load_package("my_package.pt2", "model2")
```
Args:
exported_program: An exported program created through a call from torch.export
package_path: Optional specified path to the generated .pt2 artifact.
inductor_configs: Optional dictionary of configs to control inductor.
Returns:
Path to the generated artifact
"""
from torch.export import ExportedProgram
from .debug import aot_inductor_minifier_wrapper
if not isinstance(exported_program, ExportedProgram):
raise ValueError("Only ExportedProgram is supported")
if exported_program.example_inputs is None:
raise RuntimeError(
"exported_program.example_inputs is required to be set in order "
"for AOTInductor compilation."
)
if _deprecated_unused_args is not None or _deprecated_unused_kwargs is not None:
log.warning(
"You no longer need to specify args/kwargs to aoti_compile_and_package "
"as we can get this information from exported_program.example_inputs."
)
assert (
package_path is None
or isinstance(package_path, io.BytesIO)
or (isinstance(package_path, str) and package_path.endswith(".pt2"))
), f"Expect package path to be a file ending in .pt2, is None, or is a buffer. Instead got {package_path}"
inductor_configs = inductor_configs or {}
inductor_configs["aot_inductor.package"] = True
if inductor_configs.get("aot_inductor.output_path"):
raise RuntimeError(
"Please pass in a package path to aot_inductor_compile() instead "
"of setting the aot_inductor.output_path config."
)
# a wrapper around aoti_compile_and_package_inner.
return aot_inductor_minifier_wrapper(
_aoti_compile_and_package_inner,
exported_program,
package_path=package_path,
inductor_configs=inductor_configs,
)
def _aoti_compile_and_package_inner(
gm: torch.nn.Module,
# flat_example_inputs: List[Any],
args: Tuple[Any],
kwargs: Optional[Dict[str, Any]] = None,
*,
load_and_run: bool = False,
package_path: Optional[Union[str, io.BytesIO]] = None,
inductor_configs: Optional[Dict[str, Any]] = None,
):
"""
See docstring for aoti_compile_and_package.
If `load_and_run` is True, this function will load the compiled model and run it.
This is for the minifier to check the correctness of the compiled model.
"""
from .package import package_aoti
assert isinstance(gm, torch.fx.GraphModule)
kwargs = kwargs or {}
aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)
assert isinstance(aoti_files, list)
if package_path is None:
path = [
os.path.splitext(file)[0]
for file in aoti_files
if os.path.splitext(file)[1] == ".so"
]
if len(path) == 0:
path = [
os.path.splitext(file)[0]
for file in aoti_files
if os.path.splitext(file)[1] == ".cpp"
]
package_path = path[0] + ".pt2"
res = package_aoti(package_path, aoti_files)
assert res == package_path
if load_and_run:
compiled_model = aoti_load_package(package_path)
aoti_result = compiled_model(*args, **kwargs)
return package_path
def aoti_load_package(path: Union[str, io.BytesIO]) -> Any: # type: ignore[type-arg]
"""
Loads the model from the PT2 package.
If multiple models were packaged into the PT2, this will load the default
model. To load a specific model, you can directly call the load API
```
from torch._inductor.package import load_package
compiled_model1 = load_package("my_package.pt2", "model1")
compiled_model2 = load_package("my_package.pt2", "model2")
```
Args:
path: Path to the .pt2 package
"""
from torch._inductor.package import load_package
return load_package(path)
def aot_compile(
gm: torch.fx.GraphModule,
args: Tuple[Any],
kwargs: Optional[Dict[str, Any]] = None,
*,
options: Optional[Dict[str, Any]] = None,
) -> Union[str, List[str]]:
"""
Ahead-of-time compile a given FX graph with TorchInductor into a shared library.
Args:
gm: The FX graph to compile.
args: Example arguments
kwargs: Example keyword arguments
options: Optional dict of config options. See `torch._inductor.config`.
Returns:
Path to the generated shared library, or a list of files generated by
AOTI if aot_inductor.package=True.
TODO: make it return a list by default
"""
from .compile_fx import _aoti_flatten_inputs, compile_fx_aot
flat_example_inputs, options = _aoti_flatten_inputs(
gm, args, kwargs, options=options
)
return compile_fx_aot(
gm,
flat_example_inputs, # type: ignore[arg-type]
config_patches=options,
)
def list_mode_options(
mode: Optional[str] = None, dynamic: Optional[bool] = None
) -> Dict[str, Any]:
r"""Returns a dictionary describing the optimizations that each of the available
modes passed to `torch.compile()` performs.
Args:
mode (str, optional): The mode to return the optimizations for.
If None, returns optimizations for all modes
dynamic (bool, optional): Whether dynamic shape is enabled.
Example::
>>> torch._inductor.list_mode_options()
"""
mode_options: Dict[str, Dict[str, bool]] = {
"default": {},
# enable cudagraphs
"reduce-overhead": {
"triton.cudagraphs": True,
},
# enable max-autotune
"max-autotune-no-cudagraphs": {
"max_autotune": True,
"coordinate_descent_tuning": True,
},
# enable max-autotune
# enable cudagraphs
"max-autotune": {
"max_autotune": True,
"triton.cudagraphs": True,
"coordinate_descent_tuning": True,
},
}
return mode_options[mode] if mode else mode_options # type: ignore[return-value]
def list_options() -> List[str]:
r"""Returns a dictionary describing the optimizations and debug configurations
that are available to `torch.compile()`.
The options are documented in `torch._inductor.config`.
Example::
>>> torch._inductor.list_options()
"""
from torch._inductor import config
current_config: Dict[str, Any] = config.get_config_copy()
return list(current_config.keys())
def cudagraph_mark_step_begin():
"Indicates that a new iteration of inference or training is about to begin."
from .cudagraph_trees import mark_step_begin
mark_step_begin()
|