ApproximateClock.h 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. // Copyright 2023-present Facebook. All Rights Reserved.
  2. #pragma once
  3. #include <c10/macros/Export.h>
  4. #include <array>
  5. #include <chrono>
  6. #include <cstddef>
  7. #include <cstdint>
  8. #include <ctime>
  9. #include <functional>
  10. #include <type_traits>
  11. #if defined(C10_IOS) && defined(C10_MOBILE)
  12. #include <sys/time.h> // for gettimeofday()
  13. #endif
  14. #if defined(__i386__) || defined(__x86_64__) || defined(__amd64__)
  15. #define C10_RDTSC
  16. #if defined(_MSC_VER)
  17. #include <intrin.h>
  18. #elif defined(__CUDACC__) || defined(__HIPCC__)
  19. #undef C10_RDTSC
  20. #elif defined(__clang__)
  21. // `__rdtsc` is available by default.
  22. // NB: This has to be first, because Clang will also define `__GNUC__`
  23. #elif defined(__GNUC__)
  24. #include <x86intrin.h>
  25. #else
  26. #undef C10_RDTSC
  27. #endif
  28. #endif
  29. namespace c10 {
  30. using time_t = int64_t;
  31. using steady_clock_t = std::conditional_t<
  32. std::chrono::high_resolution_clock::is_steady,
  33. std::chrono::high_resolution_clock,
  34. std::chrono::steady_clock>;
  35. inline time_t getTimeSinceEpoch() {
  36. auto now = std::chrono::system_clock::now().time_since_epoch();
  37. return std::chrono::duration_cast<std::chrono::nanoseconds>(now).count();
  38. }
  39. inline time_t getTime(bool allow_monotonic = false) {
  40. #if defined(C10_IOS) && defined(C10_MOBILE)
  41. // clock_gettime is only available on iOS 10.0 or newer. Unlike OS X, iOS
  42. // can't rely on CLOCK_REALTIME, as it is defined no matter if clock_gettime
  43. // is implemented or not
  44. struct timeval now;
  45. gettimeofday(&now, NULL);
  46. return static_cast<time_t>(now.tv_sec) * 1000000000 +
  47. static_cast<time_t>(now.tv_usec) * 1000;
  48. #elif defined(_WIN32) || defined(__MACH__)
  49. return std::chrono::duration_cast<std::chrono::nanoseconds>(
  50. steady_clock_t::now().time_since_epoch())
  51. .count();
  52. #else
  53. // clock_gettime is *much* faster than std::chrono implementation on Linux
  54. struct timespec t {};
  55. auto mode = CLOCK_REALTIME;
  56. if (allow_monotonic) {
  57. mode = CLOCK_MONOTONIC;
  58. }
  59. clock_gettime(mode, &t);
  60. return static_cast<time_t>(t.tv_sec) * 1000000000 +
  61. static_cast<time_t>(t.tv_nsec);
  62. #endif
  63. }
  64. // We often do not need to capture true wall times. If a fast mechanism such
  65. // as TSC is available we can use that instead and convert back to epoch time
  66. // during post processing. This greatly reduce the clock's contribution to
  67. // profiling.
  68. // http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/
  69. // https://quick-bench.com/q/r8opkkGZSJMu9wM_XTbDouq-0Io
  70. // TODO: We should use
  71. // `https://github.com/google/benchmark/blob/main/src/cycleclock.h`
  72. inline auto getApproximateTime() {
  73. #if defined(C10_RDTSC)
  74. return static_cast<uint64_t>(__rdtsc());
  75. #else
  76. return getTime();
  77. #endif
  78. }
  79. using approx_time_t = decltype(getApproximateTime());
  80. static_assert(
  81. std::is_same_v<approx_time_t, int64_t> ||
  82. std::is_same_v<approx_time_t, uint64_t>,
  83. "Expected either int64_t (`getTime`) or uint64_t (some TSC reads).");
  84. // Convert `getCount` results to Nanoseconds since unix epoch.
  85. class C10_API ApproximateClockToUnixTimeConverter final {
  86. public:
  87. ApproximateClockToUnixTimeConverter();
  88. std::function<time_t(approx_time_t)> makeConverter();
  89. struct UnixAndApproximateTimePair {
  90. time_t t_;
  91. approx_time_t approx_t_;
  92. };
  93. static UnixAndApproximateTimePair measurePair();
  94. private:
  95. static constexpr size_t replicates = 1001;
  96. using time_pairs = std::array<UnixAndApproximateTimePair, replicates>;
  97. time_pairs measurePairs();
  98. time_pairs start_times_;
  99. };
  100. } // namespace c10