_utils_internal.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. # mypy: allow-untyped-defs
  2. import functools
  3. import logging
  4. import os
  5. import sys
  6. import tempfile
  7. from typing import Any, Dict, Optional
  8. import torch
  9. from torch._strobelight.compile_time_profiler import StrobelightCompileTimeProfiler
  10. log = logging.getLogger(__name__)
  11. if os.environ.get("TORCH_COMPILE_STROBELIGHT", False):
  12. import shutil
  13. if not shutil.which("strobeclient"):
  14. log.info(
  15. "TORCH_COMPILE_STROBELIGHT is true, but seems like you are not on a FB machine."
  16. )
  17. else:
  18. log.info("Strobelight profiler is enabled via environment variable")
  19. StrobelightCompileTimeProfiler.enable()
  20. # this arbitrary-looking assortment of functionality is provided here
  21. # to have a central place for overrideable behavior. The motivating
  22. # use is the FB build environment, where this source file is replaced
  23. # by an equivalent.
  24. if torch._running_with_deploy():
  25. # __file__ is meaningless in the context of frozen torch used in torch deploy.
  26. # setting empty torch_parent should allow below functions to operate without crashing,
  27. # but it's unclear if there is a valid use case for them in the context of deploy.
  28. torch_parent = ""
  29. else:
  30. if os.path.basename(os.path.dirname(__file__)) == "shared":
  31. torch_parent = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
  32. else:
  33. torch_parent = os.path.dirname(os.path.dirname(__file__))
  34. def get_file_path(*path_components: str) -> str:
  35. return os.path.join(torch_parent, *path_components)
  36. def get_file_path_2(*path_components: str) -> str:
  37. return os.path.join(*path_components)
  38. def get_writable_path(path: str) -> str:
  39. if os.access(path, os.W_OK):
  40. return path
  41. return tempfile.mkdtemp(suffix=os.path.basename(path))
  42. def prepare_multiprocessing_environment(path: str) -> None:
  43. pass
  44. def resolve_library_path(path: str) -> str:
  45. return os.path.realpath(path)
  46. def throw_abstract_impl_not_imported_error(opname, module, context):
  47. if module in sys.modules:
  48. raise NotImplementedError(
  49. f"{opname}: We could not find the fake impl for this operator. "
  50. )
  51. else:
  52. raise NotImplementedError(
  53. f"{opname}: We could not find the fake impl for this operator. "
  54. f"The operator specified that you may need to import the '{module}' "
  55. f"Python module to load the fake impl. {context}"
  56. )
  57. # NB! This treats "skip" kwarg specially!!
  58. def compile_time_strobelight_meta(phase_name):
  59. def compile_time_strobelight_meta_inner(function):
  60. @functools.wraps(function)
  61. def wrapper_function(*args, **kwargs):
  62. if "skip" in kwargs:
  63. kwargs["skip"] = kwargs["skip"] + 1
  64. return StrobelightCompileTimeProfiler.profile_compile_time(
  65. function, phase_name, *args, **kwargs
  66. )
  67. return wrapper_function
  68. return compile_time_strobelight_meta_inner
  69. # Meta only, see
  70. # https://www.internalfb.com/intern/wiki/ML_Workflow_Observability/User_Guides/Adding_instrumentation_to_your_code/
  71. #
  72. # This will cause an event to get logged to Scuba via the signposts API. You
  73. # can view samples on the API at https://fburl.com/scuba/workflow_signpost/zh9wmpqs
  74. # we log to subsystem "torch", and the category and name you provide here.
  75. # Each of the arguments translate into a Scuba column. We're still figuring
  76. # out local conventions in PyTorch, but category should be something like
  77. # "dynamo" or "inductor", and name should be a specific string describing what
  78. # kind of event happened.
  79. #
  80. # Killswitch is at
  81. # https://www.internalfb.com/intern/justknobs/?name=pytorch%2Fsignpost#event
  82. def signpost_event(category: str, name: str, parameters: Dict[str, Any]):
  83. log.info("%s %s: %r", category, name, parameters)
  84. def log_compilation_event(metrics):
  85. log.info("%s", metrics)
  86. def upload_graph(graph):
  87. pass
  88. def set_pytorch_distributed_envs_from_justknobs():
  89. pass
  90. def log_export_usage(**kwargs):
  91. pass
  92. def log_torchscript_usage(api: str):
  93. _ = api
  94. return
  95. def check_if_torch_exportable():
  96. return False
  97. def log_torch_jit_trace_exportability(
  98. api: str, type_of_export: str, export_outcome: str, result: str
  99. ):
  100. _, _, _, _ = api, type_of_export, export_outcome, result
  101. return
  102. def export_api_rollout_check() -> bool:
  103. return False
  104. def justknobs_check(name: str) -> bool:
  105. """
  106. This function can be used to killswitch functionality in FB prod,
  107. where you can toggle this value to False in JK without having to
  108. do a code push. In OSS, we always have everything turned on all
  109. the time, because downstream users can simply choose to not update
  110. PyTorch. (If more fine-grained enable/disable is needed, we could
  111. potentially have a map we lookup name in to toggle behavior. But
  112. the point is that it's all tied to source code in OSS, since there's
  113. no live server to query.)
  114. This is the bare minimum functionality I needed to do some killswitches.
  115. We have a more detailed plan at
  116. https://docs.google.com/document/d/1Ukerh9_42SeGh89J-tGtecpHBPwGlkQ043pddkKb3PU/edit
  117. In particular, in some circumstances it may be necessary to read in
  118. a knob once at process start, and then use it consistently for the
  119. rest of the process. Future functionality will codify these patterns
  120. into a better high level API.
  121. WARNING: Do NOT call this function at module import time, JK is not
  122. fork safe and you will break anyone who forks the process and then
  123. hits JK again.
  124. """
  125. return True
  126. def justknobs_getval_int(name: str) -> int:
  127. """
  128. Read warning on justknobs_check
  129. """
  130. return 0
  131. @functools.lru_cache(None)
  132. def max_clock_rate():
  133. if not torch.version.hip:
  134. from triton.testing import nvsmi
  135. return nvsmi(["clocks.max.sm"])[0]
  136. else:
  137. # Manually set max-clock speeds on ROCm until equivalent nvmsi
  138. # functionality in triton.testing or via pyamdsmi enablement. Required
  139. # for test_snode_runtime unit tests.
  140. gcn_arch = str(torch.cuda.get_device_properties(0).gcnArchName.split(":", 1)[0])
  141. if "gfx94" in gcn_arch:
  142. return 1700
  143. elif "gfx90a" in gcn_arch:
  144. return 1700
  145. elif "gfx908" in gcn_arch:
  146. return 1502
  147. elif "gfx11" in gcn_arch:
  148. return 1700
  149. elif "gfx103" in gcn_arch:
  150. return 1967
  151. elif "gfx101" in gcn_arch:
  152. return 1144
  153. else:
  154. return 1100
  155. TEST_MASTER_ADDR = "127.0.0.1"
  156. TEST_MASTER_PORT = 29500
  157. # USE_GLOBAL_DEPS controls whether __init__.py tries to load
  158. # libtorch_global_deps, see Note [Global dependencies]
  159. USE_GLOBAL_DEPS = True
  160. # USE_RTLD_GLOBAL_WITH_LIBTORCH controls whether __init__.py tries to load
  161. # _C.so with RTLD_GLOBAL during the call to dlopen.
  162. USE_RTLD_GLOBAL_WITH_LIBTORCH = False
  163. # If an op was defined in C++ and extended from Python using the
  164. # torch.library.register_fake, returns if we require that there be a
  165. # m.set_python_module("mylib.ops") call from C++ that associates
  166. # the C++ op with a python module.
  167. REQUIRES_SET_PYTHON_MODULE = False
  168. def maybe_upload_prof_stats_to_manifold(profile_path: str) -> Optional[str]:
  169. print("Uploading profile stats (fb-only otherwise no-op)")
  170. return None