| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472 |
- # mypy: allow-untyped-defs
- import getpass
- import inspect
- import os
- import re
- import sys
- import tempfile
- from os.path import abspath, dirname
- from typing import Any, Callable, Dict, Optional, Set, Type, TYPE_CHECKING, Union
- import torch
- def is_fbcode():
- return not hasattr(torch.version, "git_version")
- # to configure logging for dynamo, aot, and inductor
- # use the following API in the torch._logging module
- # torch._logging.set_logs(dynamo=<level>, aot=<level>, inductor<level>)
- # or use the environment variable TORCH_LOGS="dynamo,aot,inductor" (use a prefix + to indicate higher verbosity)
- # see this design doc for more detailed info
- # Design doc: https://docs.google.com/document/d/1ZRfTWKa8eaPq1AxaiHrq4ASTPouzzlPiuquSBEJYwS8/edit#
- # the name of a file to write the logs to
- # [@compile_ignored: debug]
- log_file_name: Optional[str] = None
- # [@compile_ignored: debug] Verbose will print full stack traces on warnings and errors
- verbose = os.environ.get("TORCHDYNAMO_VERBOSE", "0") == "1"
- # [@compile_ignored: runtime_behaviour] verify the correctness of optimized backend
- verify_correctness = False
- # need this many ops to create an FX graph
- minimum_call_count = 1
- # turn on/off DCE pass
- dead_code_elimination = True
- # disable (for a function) when cache reaches this size
- # controls the maximum number of cache entries with a guard on same ID_MATCH'd
- # object. It also controls the maximum size of cache entries if they don't have
- # any ID_MATCH'd guards.
- # [@compile_ignored: runtime_behaviour]
- cache_size_limit = 8
- # [@compile_ignored: runtime_behaviour] safeguarding to prevent horrible recomps
- accumulated_cache_size_limit = 256
- # whether or not to specialize on int inputs. This only has an effect with
- # dynamic_shapes; when dynamic_shapes is False, we ALWAYS specialize on int
- # inputs. Note that assume_static_by_default will also cause ints to get
- # specialized, so this is mostly useful for export, where we want inputs
- # to be dynamic, but accesses to ints should NOT get promoted into inputs.
- specialize_int = False
- # Whether or not to specialize on float inputs. Dynamo will always promote
- # float inputs into Tensor inputs, but at the moment, backends inconsistently
- # support codegen on float (this is to be fixed).
- specialize_float = True
- # legacy config, does nothing now!
- dynamic_shapes = True
- use_lazy_graph_module = (
- os.environ.get("TORCH_COMPILE_USE_LAZY_GRAPH_MODULE", "1") == "1"
- )
- # This is a temporarily flag, which changes the behavior of dynamic_shapes=True.
- # When assume_static_by_default is True, we only allocate symbols for shapes marked dynamic via mark_dynamic.
- # NOTE - this flag can be removed once we can run dynamic_shapes=False w/ the mark_dynamic API
- # see [Note - on the state of mark_dynamic]
- assume_static_by_default = True
- # This flag changes how dynamic_shapes=True works, and is meant to be used in conjunction
- # with assume_static_by_default=True.
- # With this flag enabled, we always compile a frame as fully static for the first time, and, if we fail
- # any guards due to wobbles in shape, we recompile with *all* the wobbled shapes as being marked dynamic.
- automatic_dynamic_shapes = True
- # This flag changes how the shapes of parameters are treated.
- # If this flag is set to True, then the shapes of torch.nn.Parameter as well as of torch.Tensor are attempted to be dynamic
- # If this flag is set to False, then the shapes of torch.nn.Parameter are assumed to be static,
- # while the shapes of torch.Tensor are assumed to be dynamic.
- force_parameter_static_shapes = True
- # This flag ensures that the shapes of a nn module are always assumed to be static
- # If the flag is set to True, then the shapes of a nn.module are assumed to be static
- # If the flag is set to False, then the shapes of a nn.module can be dynamic
- force_nn_module_property_static_shapes = True
- # Typically, if you mark_dynamic a dimension, we will error if the dimension
- # actually ended up getting specialized. This knob changes the behavior so
- # that we don't error at all. This is helpful for our CI where I'm using a
- # heuristic to mark batch dimensions as dynamic and the heuristic may get it
- # wrong.
- allow_ignore_mark_dynamic = False
- # Set this to False to assume nn.Modules() contents are immutable (similar assumption as freezing)
- guard_nn_modules = False if is_fbcode() else True
- # Uses CPython internal dictionary tags to detect mutation. There is some
- # overlap between guard_nn_modules_using_dict_tags and guard_nn_modules flag.
- # guard_nn_modules unspecializes the nn module instance and adds guard for each
- # relevant member of the nn modules. On the other hand,
- # guard_nn_modules_using_dict_tags specializes on each nn module instance but
- # uses low overhead dict version matching to detect mutations, obviating the
- # need to guard on members of the nn modules. With
- # guard_nn_modules_using_dict_tags, the guard_nn_modules is not really required
- # but kept around for debugging and discussing unspecializing nn module
- # variables.
- # TODO(janimesh, voz): Remove both of these flags (or atleast guard_nn_modules)
- # once we have reached stability for the guard_nn_modules_using_dict_tags.
- guard_nn_modules_using_dict_tags = True
- # This feature doesn't really work. We offer this flag for experimental
- # purposes / if you want to help us build out support.
- #
- # torchdynamo has limited support for tensor subclasses that implement
- # __torch_function__ see [Note: __torch_function__] in torch_function.py.
- # Our current support is limited to tensor subclasses
- # that DO NOT store metadata on the tensor (in general, dynamo does not
- # support Python code that stores extra attributes on tensors at present).
- # If your tensor subclass purely changes function call behavior via
- # __torch_function__, you can allow torchdynamo to trace into it by
- # adding it to traceable_tensor_subclasses. We don't do any safety checks,
- # so it is up to you to ensure that your subclass is well behaved. See also
- # https://github.com/pytorch/torchdynamo/issues/1948
- #
- # We do NOT currently support __torch_dispatch__. The implementation is
- # currently buggy, the main show stopper for nontrivial use is
- # https://github.com/pytorch/torchdynamo/issues/1952
- traceable_tensor_subclasses: Set[Type[Any]] = set()
- # Suppress errors in torch._dynamo.optimize, instead forcing a fallback to eager.
- # This is a good way to get your model to work one way or another, but you may
- # lose optimization opportunities this way. Devs, if your benchmark model is failing
- # this way, you should figure out why instead of suppressing it.
- suppress_errors = bool(os.environ.get("TORCHDYNAMO_SUPPRESS_ERRORS", False))
- # Record and write an execution record of the current frame to a file
- # if an exception is encountered
- # @compile_ignored[debug]
- replay_record_enabled = os.environ.get("TORCH_COMPILE_REPLAY_RECORD", "0") == "1"
- # Rewrite assert statement in python with torch._assert
- rewrite_assert_with_torch_assert = True
- # Disable dynamo
- disable = os.environ.get("TORCH_COMPILE_DISABLE", False)
- # [@compile_ignored: runtime_behaviour] Get a cprofile trace of Dynamo
- cprofile = os.environ.get("TORCH_COMPILE_CPROFILE", False)
- # legacy config, does nothing now!
- skipfiles_inline_module_allowlist: Dict[Any, Any] = {}
- # If a string representing a PyTorch module is in this ignorelist,
- # the `allowed_functions.is_allowed` function will not consider it
- # when creating a list of PyTorch functions that will appear in
- # FX IR.
- allowed_functions_module_string_ignorelist = {
- "torch.distributions",
- "torch.testing",
- "torch._refs",
- "torch._prims",
- "torch._decomp",
- }
- # Debug Flag to try minifier at different stages. Possible values are {None, "aot", "dynamo"}
- # None - Minifier is switched off
- # dynamo - Runs minifier on the TorchDynamo produced graphs, if compilation fails
- # aot - Runs minifier on the Aot Autograd produced graphs, if compilation fails
- # [@compile_ignored: debug]
- repro_after = os.environ.get("TORCHDYNAMO_REPRO_AFTER", None)
- # Compiler compilation debug info
- # 1: Dumps the original graph out to repro.py if compilation fails
- # 2: Dumps a minifier_launcher.py if compilation fails.
- # 3: Always dumps a minifier_launcher.py. Good for segfaults.
- # 4: Dumps a minifier_launcher.py if the accuracy fails.
- # [@compile_ignored: debug]
- repro_level = int(os.environ.get("TORCHDYNAMO_REPRO_LEVEL", 2))
- # By default, we try to detect accuracy failure by running both forward
- # and backward of a torchdynamo produced graph (if you are using repro_after
- # 'dynamo'). This setting forces us to only test the forward graph and
- # not the backward graph. This can be helpful if you're trying to debug
- # an inference only problem, but the minifier seems to be choking on the
- # backwards step
- # TODO: Detect this situation automatically so the user doesn't need
- # to manually configure this
- # [@compile_ignored: debug]
- repro_forward_only = os.environ.get("TORCHDYNAMO_REPRO_FORWARD_ONLY") == "1"
- # The tolerance we should use when testing if a compiled graph
- # has diverged so that we should treat it as an accuracy failure
- # [@compile_ignored: debug]
- repro_tolerance = 1e-3
- # Whether to ignore non-floating point values when checking accuracy.
- # Checking accuracy of non-floating point values such as boolean tensors
- # can lead to false positives.
- # [@compile_ignored: debug]
- repro_ignore_non_fp = os.environ.get("TORCHDYNAMO_REPRO_IGNORE_NON_FP") == "1"
- # If True, when testing if two models are the same, we will test them against
- # a third fp64 reference and only report a problem if the RMSE relative to the
- # fp64 is greater. However, this will use more memory; you may disable this
- # if memory usage is too high.
- # [@compile_ignored: runtime_behaviour]
- same_two_models_use_fp64 = True
- # Not all backends support scalars. Some calls on torch.Tensor (like .item()) return a scalar type.
- # When this flag is set to False, we introduce a graph break instead of capturing.
- # This requires dynamic_shapes to be True.
- capture_scalar_outputs = os.environ.get("TORCHDYNAMO_CAPTURE_SCALAR_OUTPUTS") == "1"
- # Not all backends support operators that have dynamic output shape (e.g.,
- # nonzero, unique). When this flag is set to False, we introduce a graph
- # break instead of capturing. This requires dynamic_shapes to be True.
- # If you set this to True, you probably also want capture_scalar_outputs
- # (these are separated for historical reasons).
- capture_dynamic_output_shape_ops = (
- os.environ.get("TORCHDYNAMO_CAPTURE_DYNAMIC_OUTPUT_SHAPE_OPS", "0") == "1"
- )
- # hybrid backed unbacked symints
- prefer_deferred_runtime_asserts_over_guards = False
- # For complex dynamic shapes guards that we're unable to specify with dynamo/export's
- # range constraints + dims + derived dims language, we raise constraint violation
- # errors or specialize by default. If set to True, this flag avoids crashing/specialization,
- # and allows complex guards as runtime assertions in the graph.
- _allow_complex_guards_as_runtime_asserts = False
- # By default, dynamo will treat all ints as backed SymInts, which means (1) it
- # will wait to see the int change over multiple runs before generalizing and
- # (2) it will still always 0/1 specialize an int. When true, this knob
- # forces dynamo to treat _length_per_key and _offset_per_key on
- # KeyedJaggedTensor from torchrec as size-like unbacked SymInts, so that
- # they (1) generalize immediately and (2) unsoundly never compare equal to
- # 0/1. This is not on by default as AOTAutograd/Inductor cannot currently
- # compile this code; however, this can be useful for export.
- force_unspec_int_unbacked_size_like_on_torchrec_kjt = False
- # Should almost always be true in prod. This relaxes the requirement that cond's true_fn and
- # false_fn produces code with identical guards.
- enforce_cond_guards_match = True
- # Specify how to optimize a compiled DDP module. The flag accepts a boolean
- # value or a string. There are 4 modes.
- # 1. "ddp_optimizer" (or True): with "ddp_ptimizer", Dynamo will automatically
- # split model graph into pieces to match DDP bucket sizes to allow DDP
- # comm/compute overlap.
- # 2. "python_reducer" (experimental): this optimization requires the usage
- # of compiled_autograd. With "python_reducer", DDP will disable the C++ reducer
- # and use the Python reducer to allow compiled_autograd to trace the
- # communication and allow comm/compute overlap without graph-breaks.
- # 3. "python_reducer_without_compiled_forward" (experimental): this mode is
- # similar to "python_reducer". One should only use this optimization mode
- # when compiled_autograd is used but the DDP module is not compiled.
- # 4. "no_optimization" (or False): Dynamo won't split the model graph, nor
- # will Python reducer be used. With this mode, there will be no graph-breaks
- # and the original DDP C++ reducer will be used. There will no comm/compute
- # overlap. This mode CANNOT be used with compiled_autograd.
- # Note that to avoid breaking the existing usage, mode 1 and mode 4 can be
- # specified with a boolean value. True is using ddp_optimizer and False is
- # no optimization.
- optimize_ddp: Union[bool, str] = True
- # By default, Dynamo emits runtime asserts (e.g. torch._check, torch._check_is_size) in the graph.
- # In some cases those asserts could be performance costly
- # E.g. torch._check(tensor[0].item() > 2) for tensor on cuda will require cuda sync.
- # Setting this to True keeps them hinting to symbolic shapes engine,
- # but not be emitted in the graph.
- do_not_emit_runtime_asserts: bool = (
- os.environ.get("TORCH_DYNAMO_DO_NOT_EMIT_RUNTIME_ASSERTS", "0") == "1"
- )
- _ddp_optimization_mode = [
- "ddp_optimizer",
- "python_reducer", # experimental mode
- "python_reducer_without_compiled_forward", # experimental mode
- "no_optimization",
- ]
- def _get_optimize_ddp_mode():
- m = sys.modules[__name__]
- if isinstance(m.optimize_ddp, bool):
- if m.optimize_ddp:
- mode = "ddp_optimizer"
- else:
- mode = "no_optimization"
- elif isinstance(m.optimize_ddp, str):
- mode = m.optimize_ddp
- else:
- raise ValueError(f"Invalid type, {type(optimize_ddp)=}")
- assert mode in m._ddp_optimization_mode, f"Invalid mode {mode=}"
- return mode
- # If True, delays DDPOptimizer submodule compilation to 1st run of the model,
- # so that real tensor strides are used in all submodules
- # (instead of using FakeTensor strides which can differ from real tensor strides and causes error in some cases).
- # This feature is not hardened yet and it's known to cause issues to some models, so False by default.
- optimize_ddp_lazy_compile = False
- # Whether to skip guarding on FSDP-managed modules
- skip_fsdp_guards = True
- # Whether to apply torch._dynamo.disable() to per-param FSDP hooks
- skip_fsdp_hooks = False
- # Make dynamo skip guarding on hooks on nn modules
- # Note: unsafe: if your model actually has hooks and you remove them, or doesn't and you add them,
- # dynamo will not notice and will execute whichever version you first compiled.
- skip_nnmodule_hook_guards = True
- # If True, raises exception if TorchDynamo is called with a context manager
- raise_on_ctx_manager_usage = True
- # If True, raise when aot autograd is unsafe to use
- raise_on_unsafe_aot_autograd = False
- # If true, error if you torch.jit.trace over a dynamo-optimized function.
- # If false, silently suppress dynamo
- error_on_nested_jit_trace = True
- # If true, error with a better message if we symbolically trace over a
- # dynamo-optimized function. If false, silently suppress dynamo.
- error_on_nested_fx_trace = True
- # Disables graph breaking on rnn. YMMV with backends.
- allow_rnn = False
- # If true, error if we try to compile a function that has
- # been seen before.
- # [@compile_ignored: runtime_behaviour]
- error_on_recompile = False
- # [@compile_ignored: debug] Whether to report any guard failures (deprecated: does not do anything)
- report_guard_failures = True
- # [@compile_ignored: debug] root folder of the project
- base_dir = dirname(dirname(dirname(abspath(__file__))))
- # Trace through NumPy or graphbreak
- trace_numpy = True
- # Trace through torch.distributed code
- trace_distributed = False
- # Default NumPy dtypes when tracing with torch.compile
- # We default to 64bits. For efficiency, one may want to change these to float32
- numpy_default_float = "float64"
- numpy_default_complex = "complex128"
- numpy_default_int = "int64"
- # use numpy's PRNG if True, pytorch otherwise
- use_numpy_random_stream = False
- # Use C++ guard manager
- enable_cpp_guard_manager = os.environ.get("TORCHDYNAMO_CPP_GUARD_MANAGER", "1") == "1"
- # Inline inbuilt nn modules
- inline_inbuilt_nn_modules = (
- os.environ.get("TORCHDYNAMO_INLINE_INBUILT_NN_MODULES", "0") == "1"
- )
- def default_debug_dir_root():
- # [@compile_ignored: debug]
- DEBUG_DIR_VAR_NAME = "TORCH_COMPILE_DEBUG_DIR"
- if DEBUG_DIR_VAR_NAME in os.environ:
- return os.path.join(os.environ[DEBUG_DIR_VAR_NAME], "torch_compile_debug")
- elif is_fbcode():
- return os.path.join(
- tempfile.gettempdir(), getpass.getuser(), "torch_compile_debug"
- )
- else:
- return os.path.join(os.getcwd(), "torch_compile_debug")
- # [@compile_ignored: debug]
- debug_dir_root = default_debug_dir_root()
- # [@compile_ignored: debug]
- _save_config_ignore = {
- "repro_after",
- "repro_level",
- # workaround: "cannot pickle PyCapsule"
- "constant_functions",
- # workaround: "cannot pickle module"
- "skipfiles_inline_module_allowlist",
- }
- # for backend="cudagraphs", mutations on input be sent to the cudagraph backend
- # or replayed in aot_autograd epilogue. default is False because mutation on inputs
- # can prevent cudagraphing.
- cudagraph_backend_keep_input_mutation = False
- # enable cudagraph support for mutated inputs from prior cudagraph pool
- cudagraph_backend_support_input_mutation = False
- # When True, only ops that have the torch.Tag.pt2_compliant tag
- # will be allowed into the graph; all other ops will be disallowed
- # and will fall back to eager-mode PyTorch. Useful to ensure
- # correctness of custom ops.
- only_allow_pt2_compliant_ops = False
- capture_autograd_function = True
- # enable/disable dynamo tracing for `torch.func` transforms
- capture_func_transforms = True
- # If to log Dynamo compilation metrics into log files (for OSS) and Scuba tables (for fbcode).
- log_compilation_metrics = True
- # A set of logging functions which will be reordered to the end of graph breaks,
- # allowing dynamo to construct larget graph. Note that there are some
- # limitations to this, such as how it does not correctly print objects that were
- # mutated after the print statement.
- reorderable_logging_functions: Set[Callable[[Any], None]] = set()
- # simulates what would happen if we didn't have support for BUILD_SET opcode,
- # used for testing
- inject_BUILD_SET_unimplemented_TESTING_ONLY = False
- _autograd_backward_strict_mode_banned_ops = [
- "stride",
- "requires_grad",
- "storage_offset",
- "layout",
- "data",
- ]
- _autograd_backward_strict_mode_banned_ops.extend(
- [name for name, _ in inspect.getmembers(torch.Tensor) if re.match(r"^is_.*", name)]
- )
- # Enables caching of dispatches to fake tensors.
- fake_tensor_cache_enabled = (
- os.environ.get("TORCH_FAKE_TENSOR_DISPATCH_CACHE", "1") == "1"
- )
- # Enables cross checking between the fake tensor cache and dispatch.
- fake_tensor_cache_crosscheck_enabled = (
- os.environ.get("TORCH_FAKE_TENSOR_DISPATCH_CACHE_CROSSCHECK", "0") == "1"
- )
- # support `context_fn` in torch.utils.checkpoint.checkpoint API under torch.compile().
- # WARNING: this is an experimental flag and is subject to change.
- _experimental_support_context_fn_in_torch_utils_checkpoint = False
- # Enables the Compiled Autograd engine to trace .backward() calls made under torch.compile().
- # Note: AOT Autograd will still trace joint graphs.
- compiled_autograd = False
- if TYPE_CHECKING:
- from torch.utils._config_typing import * # noqa: F401, F403
- def _make_closure_patcher(**changes):
- ...
- from torch.utils._config_module import install_config_module
- install_config_module(sys.modules[__name__])
|