config.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. # mypy: allow-untyped-defs
  2. import getpass
  3. import inspect
  4. import os
  5. import re
  6. import sys
  7. import tempfile
  8. from os.path import abspath, dirname
  9. from typing import Any, Callable, Dict, Optional, Set, Type, TYPE_CHECKING, Union
  10. import torch
  11. def is_fbcode():
  12. return not hasattr(torch.version, "git_version")
  13. # to configure logging for dynamo, aot, and inductor
  14. # use the following API in the torch._logging module
  15. # torch._logging.set_logs(dynamo=<level>, aot=<level>, inductor<level>)
  16. # or use the environment variable TORCH_LOGS="dynamo,aot,inductor" (use a prefix + to indicate higher verbosity)
  17. # see this design doc for more detailed info
  18. # Design doc: https://docs.google.com/document/d/1ZRfTWKa8eaPq1AxaiHrq4ASTPouzzlPiuquSBEJYwS8/edit#
  19. # the name of a file to write the logs to
  20. # [@compile_ignored: debug]
  21. log_file_name: Optional[str] = None
  22. # [@compile_ignored: debug] Verbose will print full stack traces on warnings and errors
  23. verbose = os.environ.get("TORCHDYNAMO_VERBOSE", "0") == "1"
  24. # [@compile_ignored: runtime_behaviour] verify the correctness of optimized backend
  25. verify_correctness = False
  26. # need this many ops to create an FX graph
  27. minimum_call_count = 1
  28. # turn on/off DCE pass
  29. dead_code_elimination = True
  30. # disable (for a function) when cache reaches this size
  31. # controls the maximum number of cache entries with a guard on same ID_MATCH'd
  32. # object. It also controls the maximum size of cache entries if they don't have
  33. # any ID_MATCH'd guards.
  34. # [@compile_ignored: runtime_behaviour]
  35. cache_size_limit = 8
  36. # [@compile_ignored: runtime_behaviour] safeguarding to prevent horrible recomps
  37. accumulated_cache_size_limit = 256
  38. # whether or not to specialize on int inputs. This only has an effect with
  39. # dynamic_shapes; when dynamic_shapes is False, we ALWAYS specialize on int
  40. # inputs. Note that assume_static_by_default will also cause ints to get
  41. # specialized, so this is mostly useful for export, where we want inputs
  42. # to be dynamic, but accesses to ints should NOT get promoted into inputs.
  43. specialize_int = False
  44. # Whether or not to specialize on float inputs. Dynamo will always promote
  45. # float inputs into Tensor inputs, but at the moment, backends inconsistently
  46. # support codegen on float (this is to be fixed).
  47. specialize_float = True
  48. # legacy config, does nothing now!
  49. dynamic_shapes = True
  50. use_lazy_graph_module = (
  51. os.environ.get("TORCH_COMPILE_USE_LAZY_GRAPH_MODULE", "1") == "1"
  52. )
  53. # This is a temporarily flag, which changes the behavior of dynamic_shapes=True.
  54. # When assume_static_by_default is True, we only allocate symbols for shapes marked dynamic via mark_dynamic.
  55. # NOTE - this flag can be removed once we can run dynamic_shapes=False w/ the mark_dynamic API
  56. # see [Note - on the state of mark_dynamic]
  57. assume_static_by_default = True
  58. # This flag changes how dynamic_shapes=True works, and is meant to be used in conjunction
  59. # with assume_static_by_default=True.
  60. # With this flag enabled, we always compile a frame as fully static for the first time, and, if we fail
  61. # any guards due to wobbles in shape, we recompile with *all* the wobbled shapes as being marked dynamic.
  62. automatic_dynamic_shapes = True
  63. # This flag changes how the shapes of parameters are treated.
  64. # If this flag is set to True, then the shapes of torch.nn.Parameter as well as of torch.Tensor are attempted to be dynamic
  65. # If this flag is set to False, then the shapes of torch.nn.Parameter are assumed to be static,
  66. # while the shapes of torch.Tensor are assumed to be dynamic.
  67. force_parameter_static_shapes = True
  68. # This flag ensures that the shapes of a nn module are always assumed to be static
  69. # If the flag is set to True, then the shapes of a nn.module are assumed to be static
  70. # If the flag is set to False, then the shapes of a nn.module can be dynamic
  71. force_nn_module_property_static_shapes = True
  72. # Typically, if you mark_dynamic a dimension, we will error if the dimension
  73. # actually ended up getting specialized. This knob changes the behavior so
  74. # that we don't error at all. This is helpful for our CI where I'm using a
  75. # heuristic to mark batch dimensions as dynamic and the heuristic may get it
  76. # wrong.
  77. allow_ignore_mark_dynamic = False
  78. # Set this to False to assume nn.Modules() contents are immutable (similar assumption as freezing)
  79. guard_nn_modules = False if is_fbcode() else True
  80. # Uses CPython internal dictionary tags to detect mutation. There is some
  81. # overlap between guard_nn_modules_using_dict_tags and guard_nn_modules flag.
  82. # guard_nn_modules unspecializes the nn module instance and adds guard for each
  83. # relevant member of the nn modules. On the other hand,
  84. # guard_nn_modules_using_dict_tags specializes on each nn module instance but
  85. # uses low overhead dict version matching to detect mutations, obviating the
  86. # need to guard on members of the nn modules. With
  87. # guard_nn_modules_using_dict_tags, the guard_nn_modules is not really required
  88. # but kept around for debugging and discussing unspecializing nn module
  89. # variables.
  90. # TODO(janimesh, voz): Remove both of these flags (or atleast guard_nn_modules)
  91. # once we have reached stability for the guard_nn_modules_using_dict_tags.
  92. guard_nn_modules_using_dict_tags = True
  93. # This feature doesn't really work. We offer this flag for experimental
  94. # purposes / if you want to help us build out support.
  95. #
  96. # torchdynamo has limited support for tensor subclasses that implement
  97. # __torch_function__ see [Note: __torch_function__] in torch_function.py.
  98. # Our current support is limited to tensor subclasses
  99. # that DO NOT store metadata on the tensor (in general, dynamo does not
  100. # support Python code that stores extra attributes on tensors at present).
  101. # If your tensor subclass purely changes function call behavior via
  102. # __torch_function__, you can allow torchdynamo to trace into it by
  103. # adding it to traceable_tensor_subclasses. We don't do any safety checks,
  104. # so it is up to you to ensure that your subclass is well behaved. See also
  105. # https://github.com/pytorch/torchdynamo/issues/1948
  106. #
  107. # We do NOT currently support __torch_dispatch__. The implementation is
  108. # currently buggy, the main show stopper for nontrivial use is
  109. # https://github.com/pytorch/torchdynamo/issues/1952
  110. traceable_tensor_subclasses: Set[Type[Any]] = set()
  111. # Suppress errors in torch._dynamo.optimize, instead forcing a fallback to eager.
  112. # This is a good way to get your model to work one way or another, but you may
  113. # lose optimization opportunities this way. Devs, if your benchmark model is failing
  114. # this way, you should figure out why instead of suppressing it.
  115. suppress_errors = bool(os.environ.get("TORCHDYNAMO_SUPPRESS_ERRORS", False))
  116. # Record and write an execution record of the current frame to a file
  117. # if an exception is encountered
  118. # @compile_ignored[debug]
  119. replay_record_enabled = os.environ.get("TORCH_COMPILE_REPLAY_RECORD", "0") == "1"
  120. # Rewrite assert statement in python with torch._assert
  121. rewrite_assert_with_torch_assert = True
  122. # Disable dynamo
  123. disable = os.environ.get("TORCH_COMPILE_DISABLE", False)
  124. # [@compile_ignored: runtime_behaviour] Get a cprofile trace of Dynamo
  125. cprofile = os.environ.get("TORCH_COMPILE_CPROFILE", False)
  126. # legacy config, does nothing now!
  127. skipfiles_inline_module_allowlist: Dict[Any, Any] = {}
  128. # If a string representing a PyTorch module is in this ignorelist,
  129. # the `allowed_functions.is_allowed` function will not consider it
  130. # when creating a list of PyTorch functions that will appear in
  131. # FX IR.
  132. allowed_functions_module_string_ignorelist = {
  133. "torch.distributions",
  134. "torch.testing",
  135. "torch._refs",
  136. "torch._prims",
  137. "torch._decomp",
  138. }
  139. # Debug Flag to try minifier at different stages. Possible values are {None, "aot", "dynamo"}
  140. # None - Minifier is switched off
  141. # dynamo - Runs minifier on the TorchDynamo produced graphs, if compilation fails
  142. # aot - Runs minifier on the Aot Autograd produced graphs, if compilation fails
  143. # [@compile_ignored: debug]
  144. repro_after = os.environ.get("TORCHDYNAMO_REPRO_AFTER", None)
  145. # Compiler compilation debug info
  146. # 1: Dumps the original graph out to repro.py if compilation fails
  147. # 2: Dumps a minifier_launcher.py if compilation fails.
  148. # 3: Always dumps a minifier_launcher.py. Good for segfaults.
  149. # 4: Dumps a minifier_launcher.py if the accuracy fails.
  150. # [@compile_ignored: debug]
  151. repro_level = int(os.environ.get("TORCHDYNAMO_REPRO_LEVEL", 2))
  152. # By default, we try to detect accuracy failure by running both forward
  153. # and backward of a torchdynamo produced graph (if you are using repro_after
  154. # 'dynamo'). This setting forces us to only test the forward graph and
  155. # not the backward graph. This can be helpful if you're trying to debug
  156. # an inference only problem, but the minifier seems to be choking on the
  157. # backwards step
  158. # TODO: Detect this situation automatically so the user doesn't need
  159. # to manually configure this
  160. # [@compile_ignored: debug]
  161. repro_forward_only = os.environ.get("TORCHDYNAMO_REPRO_FORWARD_ONLY") == "1"
  162. # The tolerance we should use when testing if a compiled graph
  163. # has diverged so that we should treat it as an accuracy failure
  164. # [@compile_ignored: debug]
  165. repro_tolerance = 1e-3
  166. # Whether to ignore non-floating point values when checking accuracy.
  167. # Checking accuracy of non-floating point values such as boolean tensors
  168. # can lead to false positives.
  169. # [@compile_ignored: debug]
  170. repro_ignore_non_fp = os.environ.get("TORCHDYNAMO_REPRO_IGNORE_NON_FP") == "1"
  171. # If True, when testing if two models are the same, we will test them against
  172. # a third fp64 reference and only report a problem if the RMSE relative to the
  173. # fp64 is greater. However, this will use more memory; you may disable this
  174. # if memory usage is too high.
  175. # [@compile_ignored: runtime_behaviour]
  176. same_two_models_use_fp64 = True
  177. # Not all backends support scalars. Some calls on torch.Tensor (like .item()) return a scalar type.
  178. # When this flag is set to False, we introduce a graph break instead of capturing.
  179. # This requires dynamic_shapes to be True.
  180. capture_scalar_outputs = os.environ.get("TORCHDYNAMO_CAPTURE_SCALAR_OUTPUTS") == "1"
  181. # Not all backends support operators that have dynamic output shape (e.g.,
  182. # nonzero, unique). When this flag is set to False, we introduce a graph
  183. # break instead of capturing. This requires dynamic_shapes to be True.
  184. # If you set this to True, you probably also want capture_scalar_outputs
  185. # (these are separated for historical reasons).
  186. capture_dynamic_output_shape_ops = (
  187. os.environ.get("TORCHDYNAMO_CAPTURE_DYNAMIC_OUTPUT_SHAPE_OPS", "0") == "1"
  188. )
  189. # hybrid backed unbacked symints
  190. prefer_deferred_runtime_asserts_over_guards = False
  191. # For complex dynamic shapes guards that we're unable to specify with dynamo/export's
  192. # range constraints + dims + derived dims language, we raise constraint violation
  193. # errors or specialize by default. If set to True, this flag avoids crashing/specialization,
  194. # and allows complex guards as runtime assertions in the graph.
  195. _allow_complex_guards_as_runtime_asserts = False
  196. # By default, dynamo will treat all ints as backed SymInts, which means (1) it
  197. # will wait to see the int change over multiple runs before generalizing and
  198. # (2) it will still always 0/1 specialize an int. When true, this knob
  199. # forces dynamo to treat _length_per_key and _offset_per_key on
  200. # KeyedJaggedTensor from torchrec as size-like unbacked SymInts, so that
  201. # they (1) generalize immediately and (2) unsoundly never compare equal to
  202. # 0/1. This is not on by default as AOTAutograd/Inductor cannot currently
  203. # compile this code; however, this can be useful for export.
  204. force_unspec_int_unbacked_size_like_on_torchrec_kjt = False
  205. # Should almost always be true in prod. This relaxes the requirement that cond's true_fn and
  206. # false_fn produces code with identical guards.
  207. enforce_cond_guards_match = True
  208. # Specify how to optimize a compiled DDP module. The flag accepts a boolean
  209. # value or a string. There are 4 modes.
  210. # 1. "ddp_optimizer" (or True): with "ddp_ptimizer", Dynamo will automatically
  211. # split model graph into pieces to match DDP bucket sizes to allow DDP
  212. # comm/compute overlap.
  213. # 2. "python_reducer" (experimental): this optimization requires the usage
  214. # of compiled_autograd. With "python_reducer", DDP will disable the C++ reducer
  215. # and use the Python reducer to allow compiled_autograd to trace the
  216. # communication and allow comm/compute overlap without graph-breaks.
  217. # 3. "python_reducer_without_compiled_forward" (experimental): this mode is
  218. # similar to "python_reducer". One should only use this optimization mode
  219. # when compiled_autograd is used but the DDP module is not compiled.
  220. # 4. "no_optimization" (or False): Dynamo won't split the model graph, nor
  221. # will Python reducer be used. With this mode, there will be no graph-breaks
  222. # and the original DDP C++ reducer will be used. There will no comm/compute
  223. # overlap. This mode CANNOT be used with compiled_autograd.
  224. # Note that to avoid breaking the existing usage, mode 1 and mode 4 can be
  225. # specified with a boolean value. True is using ddp_optimizer and False is
  226. # no optimization.
  227. optimize_ddp: Union[bool, str] = True
  228. # By default, Dynamo emits runtime asserts (e.g. torch._check, torch._check_is_size) in the graph.
  229. # In some cases those asserts could be performance costly
  230. # E.g. torch._check(tensor[0].item() > 2) for tensor on cuda will require cuda sync.
  231. # Setting this to True keeps them hinting to symbolic shapes engine,
  232. # but not be emitted in the graph.
  233. do_not_emit_runtime_asserts: bool = (
  234. os.environ.get("TORCH_DYNAMO_DO_NOT_EMIT_RUNTIME_ASSERTS", "0") == "1"
  235. )
  236. _ddp_optimization_mode = [
  237. "ddp_optimizer",
  238. "python_reducer", # experimental mode
  239. "python_reducer_without_compiled_forward", # experimental mode
  240. "no_optimization",
  241. ]
  242. def _get_optimize_ddp_mode():
  243. m = sys.modules[__name__]
  244. if isinstance(m.optimize_ddp, bool):
  245. if m.optimize_ddp:
  246. mode = "ddp_optimizer"
  247. else:
  248. mode = "no_optimization"
  249. elif isinstance(m.optimize_ddp, str):
  250. mode = m.optimize_ddp
  251. else:
  252. raise ValueError(f"Invalid type, {type(optimize_ddp)=}")
  253. assert mode in m._ddp_optimization_mode, f"Invalid mode {mode=}"
  254. return mode
  255. # If True, delays DDPOptimizer submodule compilation to 1st run of the model,
  256. # so that real tensor strides are used in all submodules
  257. # (instead of using FakeTensor strides which can differ from real tensor strides and causes error in some cases).
  258. # This feature is not hardened yet and it's known to cause issues to some models, so False by default.
  259. optimize_ddp_lazy_compile = False
  260. # Whether to skip guarding on FSDP-managed modules
  261. skip_fsdp_guards = True
  262. # Whether to apply torch._dynamo.disable() to per-param FSDP hooks
  263. skip_fsdp_hooks = False
  264. # Make dynamo skip guarding on hooks on nn modules
  265. # Note: unsafe: if your model actually has hooks and you remove them, or doesn't and you add them,
  266. # dynamo will not notice and will execute whichever version you first compiled.
  267. skip_nnmodule_hook_guards = True
  268. # If True, raises exception if TorchDynamo is called with a context manager
  269. raise_on_ctx_manager_usage = True
  270. # If True, raise when aot autograd is unsafe to use
  271. raise_on_unsafe_aot_autograd = False
  272. # If true, error if you torch.jit.trace over a dynamo-optimized function.
  273. # If false, silently suppress dynamo
  274. error_on_nested_jit_trace = True
  275. # If true, error with a better message if we symbolically trace over a
  276. # dynamo-optimized function. If false, silently suppress dynamo.
  277. error_on_nested_fx_trace = True
  278. # Disables graph breaking on rnn. YMMV with backends.
  279. allow_rnn = False
  280. # If true, error if we try to compile a function that has
  281. # been seen before.
  282. # [@compile_ignored: runtime_behaviour]
  283. error_on_recompile = False
  284. # [@compile_ignored: debug] Whether to report any guard failures (deprecated: does not do anything)
  285. report_guard_failures = True
  286. # [@compile_ignored: debug] root folder of the project
  287. base_dir = dirname(dirname(dirname(abspath(__file__))))
  288. # Trace through NumPy or graphbreak
  289. trace_numpy = True
  290. # Trace through torch.distributed code
  291. trace_distributed = False
  292. # Default NumPy dtypes when tracing with torch.compile
  293. # We default to 64bits. For efficiency, one may want to change these to float32
  294. numpy_default_float = "float64"
  295. numpy_default_complex = "complex128"
  296. numpy_default_int = "int64"
  297. # use numpy's PRNG if True, pytorch otherwise
  298. use_numpy_random_stream = False
  299. # Use C++ guard manager
  300. enable_cpp_guard_manager = os.environ.get("TORCHDYNAMO_CPP_GUARD_MANAGER", "1") == "1"
  301. # Inline inbuilt nn modules
  302. inline_inbuilt_nn_modules = (
  303. os.environ.get("TORCHDYNAMO_INLINE_INBUILT_NN_MODULES", "0") == "1"
  304. )
  305. def default_debug_dir_root():
  306. # [@compile_ignored: debug]
  307. DEBUG_DIR_VAR_NAME = "TORCH_COMPILE_DEBUG_DIR"
  308. if DEBUG_DIR_VAR_NAME in os.environ:
  309. return os.path.join(os.environ[DEBUG_DIR_VAR_NAME], "torch_compile_debug")
  310. elif is_fbcode():
  311. return os.path.join(
  312. tempfile.gettempdir(), getpass.getuser(), "torch_compile_debug"
  313. )
  314. else:
  315. return os.path.join(os.getcwd(), "torch_compile_debug")
  316. # [@compile_ignored: debug]
  317. debug_dir_root = default_debug_dir_root()
  318. # [@compile_ignored: debug]
  319. _save_config_ignore = {
  320. "repro_after",
  321. "repro_level",
  322. # workaround: "cannot pickle PyCapsule"
  323. "constant_functions",
  324. # workaround: "cannot pickle module"
  325. "skipfiles_inline_module_allowlist",
  326. }
  327. # for backend="cudagraphs", mutations on input be sent to the cudagraph backend
  328. # or replayed in aot_autograd epilogue. default is False because mutation on inputs
  329. # can prevent cudagraphing.
  330. cudagraph_backend_keep_input_mutation = False
  331. # enable cudagraph support for mutated inputs from prior cudagraph pool
  332. cudagraph_backend_support_input_mutation = False
  333. # When True, only ops that have the torch.Tag.pt2_compliant tag
  334. # will be allowed into the graph; all other ops will be disallowed
  335. # and will fall back to eager-mode PyTorch. Useful to ensure
  336. # correctness of custom ops.
  337. only_allow_pt2_compliant_ops = False
  338. capture_autograd_function = True
  339. # enable/disable dynamo tracing for `torch.func` transforms
  340. capture_func_transforms = True
  341. # If to log Dynamo compilation metrics into log files (for OSS) and Scuba tables (for fbcode).
  342. log_compilation_metrics = True
  343. # A set of logging functions which will be reordered to the end of graph breaks,
  344. # allowing dynamo to construct larget graph. Note that there are some
  345. # limitations to this, such as how it does not correctly print objects that were
  346. # mutated after the print statement.
  347. reorderable_logging_functions: Set[Callable[[Any], None]] = set()
  348. # simulates what would happen if we didn't have support for BUILD_SET opcode,
  349. # used for testing
  350. inject_BUILD_SET_unimplemented_TESTING_ONLY = False
  351. _autograd_backward_strict_mode_banned_ops = [
  352. "stride",
  353. "requires_grad",
  354. "storage_offset",
  355. "layout",
  356. "data",
  357. ]
  358. _autograd_backward_strict_mode_banned_ops.extend(
  359. [name for name, _ in inspect.getmembers(torch.Tensor) if re.match(r"^is_.*", name)]
  360. )
  361. # Enables caching of dispatches to fake tensors.
  362. fake_tensor_cache_enabled = (
  363. os.environ.get("TORCH_FAKE_TENSOR_DISPATCH_CACHE", "1") == "1"
  364. )
  365. # Enables cross checking between the fake tensor cache and dispatch.
  366. fake_tensor_cache_crosscheck_enabled = (
  367. os.environ.get("TORCH_FAKE_TENSOR_DISPATCH_CACHE_CROSSCHECK", "0") == "1"
  368. )
  369. # support `context_fn` in torch.utils.checkpoint.checkpoint API under torch.compile().
  370. # WARNING: this is an experimental flag and is subject to change.
  371. _experimental_support_context_fn_in_torch_utils_checkpoint = False
  372. # Enables the Compiled Autograd engine to trace .backward() calls made under torch.compile().
  373. # Note: AOT Autograd will still trace joint graphs.
  374. compiled_autograd = False
  375. if TYPE_CHECKING:
  376. from torch.utils._config_typing import * # noqa: F401, F403
  377. def _make_closure_patcher(**changes):
  378. ...
  379. from torch.utils._config_module import install_config_module
  380. install_config_module(sys.modules[__name__])