| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- // Copyright 2023-present Facebook. All Rights Reserved.
- #pragma once
- #include <c10/macros/Export.h>
- #include <array>
- #include <chrono>
- #include <cstddef>
- #include <cstdint>
- #include <ctime>
- #include <functional>
- #include <type_traits>
- #if defined(C10_IOS) && defined(C10_MOBILE)
- #include <sys/time.h> // for gettimeofday()
- #endif
- #if defined(__i386__) || defined(__x86_64__) || defined(__amd64__)
- #define C10_RDTSC
- #if defined(_MSC_VER)
- #include <intrin.h>
- #elif defined(__CUDACC__) || defined(__HIPCC__)
- #undef C10_RDTSC
- #elif defined(__clang__)
- // `__rdtsc` is available by default.
- // NB: This has to be first, because Clang will also define `__GNUC__`
- #elif defined(__GNUC__)
- #include <x86intrin.h>
- #else
- #undef C10_RDTSC
- #endif
- #endif
- namespace c10 {
- using time_t = int64_t;
- using steady_clock_t = std::conditional_t<
- std::chrono::high_resolution_clock::is_steady,
- std::chrono::high_resolution_clock,
- std::chrono::steady_clock>;
- inline time_t getTimeSinceEpoch() {
- auto now = std::chrono::system_clock::now().time_since_epoch();
- return std::chrono::duration_cast<std::chrono::nanoseconds>(now).count();
- }
- inline time_t getTime(bool allow_monotonic = false) {
- #if defined(C10_IOS) && defined(C10_MOBILE)
- // clock_gettime is only available on iOS 10.0 or newer. Unlike OS X, iOS
- // can't rely on CLOCK_REALTIME, as it is defined no matter if clock_gettime
- // is implemented or not
- struct timeval now;
- gettimeofday(&now, NULL);
- return static_cast<time_t>(now.tv_sec) * 1000000000 +
- static_cast<time_t>(now.tv_usec) * 1000;
- #elif defined(_WIN32) || defined(__MACH__)
- return std::chrono::duration_cast<std::chrono::nanoseconds>(
- steady_clock_t::now().time_since_epoch())
- .count();
- #else
- // clock_gettime is *much* faster than std::chrono implementation on Linux
- struct timespec t {};
- auto mode = CLOCK_REALTIME;
- if (allow_monotonic) {
- mode = CLOCK_MONOTONIC;
- }
- clock_gettime(mode, &t);
- return static_cast<time_t>(t.tv_sec) * 1000000000 +
- static_cast<time_t>(t.tv_nsec);
- #endif
- }
- // We often do not need to capture true wall times. If a fast mechanism such
- // as TSC is available we can use that instead and convert back to epoch time
- // during post processing. This greatly reduce the clock's contribution to
- // profiling.
- // http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/
- // https://quick-bench.com/q/r8opkkGZSJMu9wM_XTbDouq-0Io
- // TODO: We should use
- // `https://github.com/google/benchmark/blob/main/src/cycleclock.h`
- inline auto getApproximateTime() {
- #if defined(C10_RDTSC)
- return static_cast<uint64_t>(__rdtsc());
- #else
- return getTime();
- #endif
- }
- using approx_time_t = decltype(getApproximateTime());
- static_assert(
- std::is_same_v<approx_time_t, int64_t> ||
- std::is_same_v<approx_time_t, uint64_t>,
- "Expected either int64_t (`getTime`) or uint64_t (some TSC reads).");
- // Convert `getCount` results to Nanoseconds since unix epoch.
- class C10_API ApproximateClockToUnixTimeConverter final {
- public:
- ApproximateClockToUnixTimeConverter();
- std::function<time_t(approx_time_t)> makeConverter();
- struct UnixAndApproximateTimePair {
- time_t t_;
- approx_time_t approx_t_;
- };
- static UnixAndApproximateTimePair measurePair();
- private:
- static constexpr size_t replicates = 1001;
- using time_pairs = std::array<UnixAndApproximateTimePair, replicates>;
- time_pairs measurePairs();
- time_pairs start_times_;
- };
- } // namespace c10
|