profiler.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # mypy: allow-untyped-defs
  2. import contextlib
  3. import tempfile
  4. import torch
  5. from . import check_error, cudart
  6. __all__ = ["init", "start", "stop", "profile"]
  7. DEFAULT_FLAGS = [
  8. "gpustarttimestamp",
  9. "gpuendtimestamp",
  10. "gridsize3d",
  11. "threadblocksize",
  12. "streamid",
  13. "enableonstart 0",
  14. "conckerneltrace",
  15. ]
  16. def init(output_file, flags=None, output_mode="key_value"):
  17. rt = cudart()
  18. if not hasattr(rt, "cudaOutputMode"):
  19. raise AssertionError("HIP does not support profiler initialization!")
  20. if (
  21. hasattr(torch.version, "cuda")
  22. and torch.version.cuda is not None
  23. and int(torch.version.cuda.split(".")[0]) >= 12
  24. ):
  25. # Check https://github.com/pytorch/pytorch/pull/91118
  26. # cudaProfilerInitialize is no longer needed after CUDA 12
  27. raise AssertionError("CUDA12+ does not need profiler initialization!")
  28. flags = DEFAULT_FLAGS if flags is None else flags
  29. if output_mode == "key_value":
  30. output_mode_enum = rt.cudaOutputMode.KeyValuePair
  31. elif output_mode == "csv":
  32. output_mode_enum = rt.cudaOutputMode.CSV
  33. else:
  34. raise RuntimeError(
  35. "supported CUDA profiler output modes are: key_value and csv"
  36. )
  37. with tempfile.NamedTemporaryFile(delete=True) as f:
  38. f.write(b"\n".join(f.encode("ascii") for f in flags))
  39. f.flush()
  40. check_error(rt.cudaProfilerInitialize(f.name, output_file, output_mode_enum))
  41. def start():
  42. check_error(cudart().cudaProfilerStart())
  43. def stop():
  44. check_error(cudart().cudaProfilerStop())
  45. @contextlib.contextmanager
  46. def profile():
  47. try:
  48. start()
  49. yield
  50. finally:
  51. stop()