__init__.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. # mypy: allow-untyped-defs
  2. import contextlib
  3. from typing import Union
  4. from typing_extensions import deprecated
  5. import torch
  6. __all__ = [
  7. "is_built",
  8. "cuFFTPlanCacheAttrContextProp",
  9. "cuFFTPlanCache",
  10. "cuFFTPlanCacheManager",
  11. "cuBLASModule",
  12. "preferred_linalg_library",
  13. "preferred_blas_library",
  14. "cufft_plan_cache",
  15. "matmul",
  16. "SDPAParams",
  17. "enable_cudnn_sdp",
  18. "cudnn_sdp_enabled",
  19. "enable_flash_sdp",
  20. "flash_sdp_enabled",
  21. "enable_mem_efficient_sdp",
  22. "mem_efficient_sdp_enabled",
  23. "math_sdp_enabled",
  24. "enable_math_sdp",
  25. "can_use_flash_attention",
  26. "can_use_efficient_attention",
  27. "sdp_kernel",
  28. ]
  29. def is_built():
  30. r"""
  31. Return whether PyTorch is built with CUDA support.
  32. Note that this doesn't necessarily mean CUDA is available; just that if this PyTorch
  33. binary were run on a machine with working CUDA drivers and devices, we would be able to use it.
  34. """
  35. return torch._C._has_cuda
  36. class cuFFTPlanCacheAttrContextProp:
  37. # Like regular ContextProp, but uses the `.device_index` attribute from the
  38. # calling object as the first argument to the getter and setter.
  39. def __init__(self, getter, setter):
  40. self.getter = getter
  41. self.setter = setter
  42. def __get__(self, obj, objtype):
  43. return self.getter(obj.device_index)
  44. def __set__(self, obj, val):
  45. if isinstance(self.setter, str):
  46. raise RuntimeError(self.setter)
  47. self.setter(obj.device_index, val)
  48. class cuFFTPlanCache:
  49. r"""
  50. Represent a specific plan cache for a specific `device_index`.
  51. The attributes `size` and `max_size`, and method `clear`, can fetch and/ or
  52. change properties of the C++ cuFFT plan cache.
  53. """
  54. def __init__(self, device_index):
  55. self.device_index = device_index
  56. size = cuFFTPlanCacheAttrContextProp(
  57. torch._cufft_get_plan_cache_size,
  58. ".size is a read-only property showing the number of plans currently in the "
  59. "cache. To change the cache capacity, set cufft_plan_cache.max_size.",
  60. )
  61. max_size = cuFFTPlanCacheAttrContextProp(
  62. torch._cufft_get_plan_cache_max_size, torch._cufft_set_plan_cache_max_size
  63. )
  64. def clear(self):
  65. return torch._cufft_clear_plan_cache(self.device_index)
  66. class cuFFTPlanCacheManager:
  67. r"""
  68. Represent all cuFFT plan caches, return the cuFFTPlanCache for a given device when indexed.
  69. Finally, this object, when used directly as a `cuFFTPlanCache` object (e.g.,
  70. setting the `.max_size`) attribute, the current device's cuFFT plan cache is
  71. used.
  72. """
  73. __initialized = False
  74. def __init__(self):
  75. self.caches = []
  76. self.__initialized = True
  77. def __getitem__(self, device):
  78. index = torch.cuda._utils._get_device_index(device)
  79. if index < 0 or index >= torch.cuda.device_count():
  80. raise RuntimeError(
  81. f"cufft_plan_cache: expected 0 <= device index < {torch.cuda.device_count()}, but got "
  82. f"device with index {index}"
  83. )
  84. if len(self.caches) == 0:
  85. self.caches.extend(
  86. cuFFTPlanCache(index) for index in range(torch.cuda.device_count())
  87. )
  88. return self.caches[index]
  89. def __getattr__(self, name):
  90. return getattr(self[torch.cuda.current_device()], name)
  91. def __setattr__(self, name, value):
  92. if self.__initialized:
  93. return setattr(self[torch.cuda.current_device()], name, value)
  94. else:
  95. return super().__setattr__(name, value)
  96. class cuBLASModule:
  97. def __getattr__(self, name):
  98. if name == "allow_tf32":
  99. return torch._C._get_cublas_allow_tf32()
  100. elif name == "allow_fp16_reduced_precision_reduction":
  101. return torch._C._get_cublas_allow_fp16_reduced_precision_reduction()
  102. elif name == "allow_bf16_reduced_precision_reduction":
  103. return torch._C._get_cublas_allow_bf16_reduced_precision_reduction()
  104. raise AttributeError("Unknown attribute " + name)
  105. def __setattr__(self, name, value):
  106. if name == "allow_tf32":
  107. return torch._C._set_cublas_allow_tf32(value)
  108. elif name == "allow_fp16_reduced_precision_reduction":
  109. return torch._C._set_cublas_allow_fp16_reduced_precision_reduction(value)
  110. elif name == "allow_bf16_reduced_precision_reduction":
  111. return torch._C._set_cublas_allow_bf16_reduced_precision_reduction(value)
  112. raise AttributeError("Unknown attribute " + name)
  113. _LinalgBackends = {
  114. "default": torch._C._LinalgBackend.Default,
  115. "cusolver": torch._C._LinalgBackend.Cusolver,
  116. "magma": torch._C._LinalgBackend.Magma,
  117. }
  118. _LinalgBackends_str = ", ".join(_LinalgBackends.keys())
  119. def preferred_linalg_library(
  120. backend: Union[None, str, torch._C._LinalgBackend] = None
  121. ) -> torch._C._LinalgBackend:
  122. r"""
  123. Override the heuristic PyTorch uses to choose between cuSOLVER and MAGMA for CUDA linear algebra operations.
  124. .. warning:: This flag is experimental and subject to change.
  125. When PyTorch runs a CUDA linear algebra operation it often uses the cuSOLVER or MAGMA libraries,
  126. and if both are available it decides which to use with a heuristic.
  127. This flag (a :class:`str`) allows overriding those heuristics.
  128. * If `"cusolver"` is set then cuSOLVER will be used wherever possible.
  129. * If `"magma"` is set then MAGMA will be used wherever possible.
  130. * If `"default"` (the default) is set then heuristics will be used to pick between
  131. cuSOLVER and MAGMA if both are available.
  132. * When no input is given, this function returns the currently preferred library.
  133. * User may use the environment variable TORCH_LINALG_PREFER_CUSOLVER=1 to set the preferred library to cuSOLVER
  134. globally.
  135. This flag only sets the initial value of the preferred library and the preferred library
  136. may still be overridden by this function call later in your script.
  137. Note: When a library is preferred other libraries may still be used if the preferred library
  138. doesn't implement the operation(s) called.
  139. This flag may achieve better performance if PyTorch's heuristic library selection is incorrect
  140. for your application's inputs.
  141. Currently supported linalg operators:
  142. * :func:`torch.linalg.inv`
  143. * :func:`torch.linalg.inv_ex`
  144. * :func:`torch.linalg.cholesky`
  145. * :func:`torch.linalg.cholesky_ex`
  146. * :func:`torch.cholesky_solve`
  147. * :func:`torch.cholesky_inverse`
  148. * :func:`torch.linalg.lu_factor`
  149. * :func:`torch.linalg.lu`
  150. * :func:`torch.linalg.lu_solve`
  151. * :func:`torch.linalg.qr`
  152. * :func:`torch.linalg.eigh`
  153. * :func:`torch.linalg.eighvals`
  154. * :func:`torch.linalg.svd`
  155. * :func:`torch.linalg.svdvals`
  156. """
  157. if backend is None:
  158. pass
  159. elif isinstance(backend, str):
  160. if backend not in _LinalgBackends:
  161. raise RuntimeError(
  162. "Unknown input value. " f"Choose from: {_LinalgBackends_str}."
  163. )
  164. torch._C._set_linalg_preferred_backend(_LinalgBackends[backend])
  165. elif isinstance(backend, torch._C._LinalgBackend):
  166. torch._C._set_linalg_preferred_backend(backend)
  167. else:
  168. raise RuntimeError("Unknown input value type.")
  169. return torch._C._get_linalg_preferred_backend()
  170. _BlasBackends = {
  171. "cublas": torch._C._BlasBackend.Cublas,
  172. "cublaslt": torch._C._BlasBackend.Cublaslt,
  173. "hipblaslt": torch._C._BlasBackend.Cublaslt, # alias
  174. }
  175. _BlasBackends_str = ", ".join(_BlasBackends.keys())
  176. def preferred_blas_library(
  177. backend: Union[None, str, torch._C._BlasBackend] = None
  178. ) -> torch._C._BlasBackend:
  179. r"""
  180. Override the library PyTorch uses for BLAS operations. Choose between cuBLAS and cuBLASLt.
  181. .. warning:: This flag is experimental and subject to change.
  182. When PyTorch runs a CUDA BLAS operation it defaults to cuBLAS even if both cuBLAS and cuBLASLt are available.
  183. For PyTorch built for ROCm, hipBLAS and hipBLASLt may offer different performance.
  184. This flag (a :class:`str`) allows overriding which BLAS library to use.
  185. * If `"cublas"` is set then cuBLAS will be used wherever possible.
  186. * If `"cublaslt"` is set then cuBLASLt will be used wherever possible.
  187. * When no input is given, this function returns the currently preferred library.
  188. * User may use the environment variable TORCH_BLAS_PREFER_CUBLASLT=1 to set the preferred library to cuBLASLt
  189. globally.
  190. This flag only sets the initial value of the preferred library and the preferred library
  191. may still be overridden by this function call later in your script.
  192. Note: When a library is preferred other libraries may still be used if the preferred library
  193. doesn't implement the operation(s) called.
  194. This flag may achieve better performance if PyTorch's library selection is incorrect
  195. for your application's inputs.
  196. """
  197. if backend is None:
  198. pass
  199. elif isinstance(backend, str):
  200. if backend not in _BlasBackends:
  201. raise RuntimeError(
  202. "Unknown input value. " f"Choose from: {_BlasBackends_str}."
  203. )
  204. torch._C._set_blas_preferred_backend(_BlasBackends[backend])
  205. elif isinstance(backend, torch._C._BlasBackend):
  206. torch._C._set_blas_preferred_backend(backend)
  207. else:
  208. raise RuntimeError("Unknown input value type.")
  209. return torch._C._get_blas_preferred_backend()
  210. from torch._C import _SDPAParams as SDPAParams, _SDPBackend as SDPBackend
  211. # Set the __module__ attribute
  212. SDPAParams.__module__ = "torch.backends.cuda"
  213. SDPAParams.__name__ = "SDPAParams"
  214. def flash_sdp_enabled():
  215. r"""
  216. .. warning:: This flag is beta and subject to change.
  217. Returns whether flash scaled dot product attention is enabled or not.
  218. """
  219. return torch._C._get_flash_sdp_enabled()
  220. def enable_flash_sdp(enabled: bool):
  221. r"""
  222. .. warning:: This flag is beta and subject to change.
  223. Enables or disables flash scaled dot product attention.
  224. """
  225. torch._C._set_sdp_use_flash(enabled)
  226. def mem_efficient_sdp_enabled():
  227. r"""
  228. .. warning:: This flag is beta and subject to change.
  229. Returns whether memory efficient scaled dot product attention is enabled or not.
  230. """
  231. return torch._C._get_mem_efficient_sdp_enabled()
  232. def enable_mem_efficient_sdp(enabled: bool):
  233. r"""
  234. .. warning:: This flag is beta and subject to change.
  235. Enables or disables memory efficient scaled dot product attention.
  236. """
  237. torch._C._set_sdp_use_mem_efficient(enabled)
  238. def math_sdp_enabled():
  239. r"""
  240. .. warning:: This flag is beta and subject to change.
  241. Returns whether math scaled dot product attention is enabled or not.
  242. """
  243. return torch._C._get_math_sdp_enabled()
  244. def enable_math_sdp(enabled: bool):
  245. r"""
  246. .. warning:: This flag is beta and subject to change.
  247. Enables or disables math scaled dot product attention.
  248. """
  249. torch._C._set_sdp_use_math(enabled)
  250. def can_use_flash_attention(params: SDPAParams, debug: bool = False) -> bool:
  251. r"""Check if FlashAttention can be utilized in scaled_dot_product_attention.
  252. Args:
  253. params: An instance of SDPAParams containing the tensors for query,
  254. key, value, an optional attention mask, dropout rate, and
  255. a flag indicating if the attention is causal.
  256. debug: Whether to logging.warn debug information as to why FlashAttention could not be run.
  257. Defaults to False.
  258. Returns:
  259. True if FlashAttention can be used with the given parameters; otherwise, False.
  260. Note:
  261. This function is dependent on a CUDA-enabled build of PyTorch. It will return False
  262. in non-CUDA environments.
  263. """
  264. return torch._C._can_use_flash_attention(params, debug)
  265. def can_use_efficient_attention(params: SDPAParams, debug: bool = False) -> bool:
  266. r"""Check if efficient_attention can be utilized in scaled_dot_product_attention.
  267. Args:
  268. params: An instance of SDPAParams containing the tensors for query,
  269. key, value, an optional attention mask, dropout rate, and
  270. a flag indicating if the attention is causal.
  271. debug: Whether to logging.warn with information as to why efficient_attention could not be run.
  272. Defaults to False.
  273. Returns:
  274. True if efficient_attention can be used with the given parameters; otherwise, False.
  275. Note:
  276. This function is dependent on a CUDA-enabled build of PyTorch. It will return False
  277. in non-CUDA environments.
  278. """
  279. return torch._C._can_use_mem_efficient_attention(params, debug)
  280. def cudnn_sdp_enabled():
  281. r"""
  282. .. warning:: This flag is beta and subject to change.
  283. Returns whether cuDNN scaled dot product attention is enabled or not.
  284. """
  285. return torch._C._get_cudnn_sdp_enabled()
  286. def enable_cudnn_sdp(enabled: bool):
  287. r"""
  288. .. warning:: This flag is beta and subject to change.
  289. Enables or disables cuDNN scaled dot product attention.
  290. """
  291. torch._C._set_sdp_use_cudnn(enabled)
  292. @contextlib.contextmanager
  293. @deprecated(
  294. (
  295. "`torch.backends.cuda.sdp_kernel()` is deprecated. "
  296. "In the future, this context manager will be removed. "
  297. "Please see `torch.nn.attention.sdpa_kernel()` for the new context manager, "
  298. "with updated signature."
  299. ),
  300. category=FutureWarning,
  301. )
  302. def sdp_kernel(
  303. enable_flash: bool = True,
  304. enable_math: bool = True,
  305. enable_mem_efficient: bool = True,
  306. enable_cudnn: bool = True,
  307. ):
  308. r"""
  309. .. warning:: This flag is beta and subject to change.
  310. This context manager can be used to temporarily enable or disable any of the three backends for scaled dot product attention.
  311. Upon exiting the context manager, the previous state of the flags will be restored.
  312. """
  313. from torch.nn.attention import sdpa_kernel
  314. backend_list = []
  315. if enable_flash:
  316. backend_list.append(SDPBackend.FLASH_ATTENTION)
  317. if enable_mem_efficient:
  318. backend_list.append(SDPBackend.EFFICIENT_ATTENTION)
  319. if enable_math:
  320. backend_list.append(SDPBackend.MATH)
  321. if enable_cudnn:
  322. backend_list.append(SDPBackend.CUDNN_ATTENTION)
  323. with sdpa_kernel(backend_list) as context:
  324. try:
  325. yield context
  326. finally:
  327. pass
  328. cufft_plan_cache = cuFFTPlanCacheManager()
  329. matmul = cuBLASModule()