Commit ee43805a authored by lpy's avatar lpy Committed by Commit bot

[base] Implement CPU time on Windows.

We already implemented CPU time for OS X and POSIX, this path is a
follow up for the implementation on Windows.

BUG=v8:5000
LOG=n

Review-Url: https://codereview.chromium.org/1977983003
Cr-Commit-Position: refs/heads/master@{#36656}
parent a7d091ff
......@@ -338,7 +338,8 @@ CPU::CPU()
has_vfp_(false),
has_vfp3_(false),
has_vfp3_d32_(false),
is_fp64_mode_(false) {
is_fp64_mode_(false),
has_non_stop_time_stamp_counter_(false) {
memcpy(vendor_, "Unknown", 8);
#if V8_OS_NACL
// Portable host shouldn't do feature detection.
......@@ -419,6 +420,13 @@ CPU::CPU()
has_sahf_ = (cpu_info[2] & 0x00000001) != 0;
}
// Check if CPU has non stoppable time stamp counter.
const int parameter_containing_non_stop_time_stamp_counter = 0x80000007;
if (num_ext_ids >= parameter_containing_non_stop_time_stamp_counter) {
__cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter);
has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
}
#elif V8_HOST_ARCH_ARM
#if V8_OS_LINUX
......
......@@ -97,6 +97,9 @@ class CPU final {
bool has_lzcnt() const { return has_lzcnt_; }
bool has_popcnt() const { return has_popcnt_; }
bool is_atom() const { return is_atom_; }
bool has_non_stop_time_stamp_counter() const {
return has_non_stop_time_stamp_counter_;
}
// arm features
bool has_idiva() const { return has_idiva_; }
......@@ -148,6 +151,7 @@ class CPU final {
bool has_vfp3_;
bool has_vfp3_d32_;
bool is_fp64_mode_;
bool has_non_stop_time_stamp_counter_;
};
} // namespace base
......
......@@ -53,7 +53,7 @@ int64_t ComputeThreadTicks() {
// microsecond timebase. Minimum requirement is MONOTONIC_CLOCK to be supported
// on the system. FreeBSD 6 has CLOCK_MONOTONIC but defines
// _POSIX_MONOTONIC_CLOCK to -1.
inline int64_t ClockNow(clockid_t clk_id) {
V8_INLINE int64_t ClockNow(clockid_t clk_id) {
#if (defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0) || \
defined(V8_OS_BSD) || defined(V8_OS_ANDROID)
struct timespec ts;
......@@ -69,6 +69,24 @@ inline int64_t ClockNow(clockid_t clk_id) {
return 0;
#endif
}
#elif V8_OS_WIN
V8_INLINE bool IsQPCReliable() {
v8::base::CPU cpu;
// On Athlon X2 CPUs (e.g. model 15) QueryPerformanceCounter is unreliable.
return strcmp(cpu.vendor(), "AuthenticAMD") == 0 && cpu.family() == 15;
}
// Returns the current value of the performance counter.
V8_INLINE uint64_t QPCNowRaw() {
LARGE_INTEGER perf_counter_now = {};
// According to the MSDN documentation for QueryPerformanceCounter(), this
// will never fail on systems that run XP or later.
// https://msdn.microsoft.com/library/windows/desktop/ms644904.aspx
BOOL result = ::QueryPerformanceCounter(&perf_counter_now);
DCHECK(result);
USE(result);
return perf_counter_now.QuadPart;
}
#endif // V8_OS_MACOSX
......@@ -458,15 +476,12 @@ class HighResolutionTickClock final : public TickClock {
virtual ~HighResolutionTickClock() {}
int64_t Now() override {
LARGE_INTEGER now;
BOOL result = QueryPerformanceCounter(&now);
DCHECK(result);
USE(result);
uint64_t now = QPCNowRaw();
// Intentionally calculate microseconds in a round about manner to avoid
// overflow and precision issues. Think twice before simplifying!
int64_t whole_seconds = now.QuadPart / ticks_per_second_;
int64_t leftover_ticks = now.QuadPart % ticks_per_second_;
int64_t whole_seconds = now / ticks_per_second_;
int64_t leftover_ticks = now % ticks_per_second_;
int64_t ticks = (whole_seconds * Time::kMicrosecondsPerSecond) +
((leftover_ticks * Time::kMicrosecondsPerSecond) / ticks_per_second_);
......@@ -531,10 +546,8 @@ struct CreateHighResTickClockTrait {
return tick_clock.Pointer();
}
// On Athlon X2 CPUs (e.g. model 15) the QueryPerformanceCounter
// is unreliable, fallback to the low-resolution tick clock.
CPU cpu;
if (strcmp(cpu.vendor(), "AuthenticAMD") == 0 && cpu.family() == 15) {
// If QPC not reliable, fallback to low-resolution tick clock.
if (IsQPCReliable()) {
return tick_clock.Pointer();
}
......@@ -623,11 +636,106 @@ ThreadTicks ThreadTicks::Now() {
#elif(defined(_POSIX_THREAD_CPUTIME) && (_POSIX_THREAD_CPUTIME >= 0)) || \
defined(V8_OS_ANDROID)
return ThreadTicks(ClockNow(CLOCK_THREAD_CPUTIME_ID));
#elif V8_OS_WIN
return ThreadTicks::GetForThread(::GetCurrentThread());
#else
UNREACHABLE();
return ThreadTicks();
#endif
}
#if V8_OS_WIN
ThreadTicks ThreadTicks::GetForThread(const HANDLE& thread_handle) {
DCHECK(IsSupported());
// Get the number of TSC ticks used by the current thread.
ULONG64 thread_cycle_time = 0;
::QueryThreadCycleTime(thread_handle, &thread_cycle_time);
// Get the frequency of the TSC.
double tsc_ticks_per_second = TSCTicksPerSecond();
if (tsc_ticks_per_second == 0)
return ThreadTicks();
// Return the CPU time of the current thread.
double thread_time_seconds = thread_cycle_time / tsc_ticks_per_second;
return ThreadTicks(
static_cast<int64_t>(thread_time_seconds * Time::kMicrosecondsPerSecond));
}
// static
bool ThreadTicks::IsSupportedWin() {
static bool is_supported = base::CPU().has_non_stop_time_stamp_counter() &&
!IsQPCReliable();
return is_supported;
}
// static
void ThreadTicks::WaitUntilInitializedWin() {
while (TSCTicksPerSecond() == 0)
::Sleep(10);
}
double ThreadTicks::TSCTicksPerSecond() {
DCHECK(IsSupported());
// The value returned by QueryPerformanceFrequency() cannot be used as the TSC
// frequency, because there is no guarantee that the TSC frequency is equal to
// the performance counter frequency.
// The TSC frequency is cached in a static variable because it takes some time
// to compute it.
static double tsc_ticks_per_second = 0;
if (tsc_ticks_per_second != 0)
return tsc_ticks_per_second;
// Increase the thread priority to reduces the chances of having a context
// switch during a reading of the TSC and the performance counter.
int previous_priority = ::GetThreadPriority(::GetCurrentThread());
::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
// The first time that this function is called, make an initial reading of the
// TSC and the performance counter.
static const uint64_t tsc_initial = __rdtsc();
static const uint64_t perf_counter_initial = QPCNowRaw();
// Make a another reading of the TSC and the performance counter every time
// that this function is called.
uint64_t tsc_now = __rdtsc();
uint64_t perf_counter_now = QPCNowRaw();
// Reset the thread priority.
::SetThreadPriority(::GetCurrentThread(), previous_priority);
// Make sure that at least 50 ms elapsed between the 2 readings. The first
// time that this function is called, we don't expect this to be the case.
// Note: The longer the elapsed time between the 2 readings is, the more
// accurate the computed TSC frequency will be. The 50 ms value was
// chosen because local benchmarks show that it allows us to get a
// stddev of less than 1 tick/us between multiple runs.
// Note: According to the MSDN documentation for QueryPerformanceFrequency(),
// this will never fail on systems that run XP or later.
// https://msdn.microsoft.com/library/windows/desktop/ms644905.aspx
LARGE_INTEGER perf_counter_frequency = {};
::QueryPerformanceFrequency(&perf_counter_frequency);
DCHECK_GE(perf_counter_now, perf_counter_initial);
uint64_t perf_counter_ticks = perf_counter_now - perf_counter_initial;
double elapsed_time_seconds =
perf_counter_ticks / static_cast<double>(perf_counter_frequency.QuadPart);
const double kMinimumEvaluationPeriodSeconds = 0.05;
if (elapsed_time_seconds < kMinimumEvaluationPeriodSeconds)
return 0;
// Compute the frequency of the TSC.
DCHECK_GE(tsc_now, tsc_initial);
uint64_t tsc_ticks = tsc_now - tsc_initial;
tsc_ticks_per_second = tsc_ticks / elapsed_time_seconds;
return tsc_ticks_per_second;
}
#endif // V8_OS_WIN
} // namespace base
} // namespace v8
......@@ -12,6 +12,9 @@
#include "src/base/bits.h"
#include "src/base/macros.h"
#include "src/base/safe_math.h"
#if V8_OS_WIN
#include "src/base/win32-headers.h"
#endif
// Forward declarations.
extern "C" {
......@@ -380,17 +383,42 @@ class ThreadTicks final : public time_internal::TimeBase<ThreadTicks> {
// Returns true if ThreadTicks::Now() is supported on this system.
static bool IsSupported();
// Waits until the initialization is completed. Needs to be guarded with a
// call to IsSupported().
static void WaitUntilInitialized() {
#if V8_OS_WIN
WaitUntilInitializedWin();
#endif
}
// Returns thread-specific CPU-time on systems that support this feature.
// Needs to be guarded with a call to IsSupported(). Use this timer
// to (approximately) measure how much time the calling thread spent doing
// actual work vs. being de-scheduled. May return bogus results if the thread
// migrates to another CPU between two calls. Returns an empty ThreadTicks
// object until the initialization is completed.
// object until the initialization is completed. If a clock reading is
// absolutely needed, call WaitUntilInitialized() before this method.
static ThreadTicks Now();
#if V8_OS_WIN
// Similar to Now() above except this returns thread-specific CPU time for an
// arbitrary thread. All comments for Now() method above apply apply to this
// method as well.
static ThreadTicks GetForThread(const HANDLE& thread_handle);
#endif
private:
// This is for internal use and testing. Ticks are in microseconds.
// Please use Now() or GetForThread() to create a new object. This is for
// internal use and testing. Ticks are in microseconds.
explicit ThreadTicks(int64_t ticks) : TimeBase(ticks) {}
#if V8_OS_WIN
// Returns the frequency of the TSC in ticks per second, or 0 if it hasn't
// been measured yet. Needs to be guarded with a call to IsSupported().
static double TSCTicksPerSecond();
static bool IsSupportedWin();
static void WaitUntilInitializedWin();
#endif
};
} // namespace base
......
......@@ -27,10 +27,10 @@
#ifndef NOMCX
#define NOMCX
#endif
// Require Windows XP or higher (this is required for the RtlCaptureContext
// function to be present).
// Require Windows Vista or higher (this is required for the
// QueryThreadCycleTime function to be present).
#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x501
#define _WIN32_WINNT 0x0600
#endif
#include <windows.h>
......
......@@ -15,6 +15,8 @@
#include "src/base/win32-headers.h"
#endif
#include <vector>
#include "src/base/platform/elapsed-timer.h"
#include "src/base/platform/platform.h"
#include "testing/gtest/include/gtest/gtest.h"
......@@ -185,7 +187,7 @@ TEST(TimeTicks, IsMonotonic) {
// Disable on windows until it is implemented.
#if V8_OS_ANDROID || V8_OS_WIN
#if V8_OS_ANDROID
#define MAYBE_ThreadNow DISABLED_ThreadNow
#else
#define MAYBE_ThreadNow ThreadNow
......@@ -210,5 +212,50 @@ TEST(ThreadTicks, MAYBE_ThreadNow) {
}
}
#if V8_OS_WIN
TEST(TimeTicks, TimerPerformance) {
// Verify that various timer mechanisms can always complete quickly.
// Note: This is a somewhat arbitrary test.
const int kLoops = 10000;
typedef TimeTicks (*TestFunc)();
struct TestCase {
TestFunc func;
const char *description;
};
// Cheating a bit here: assumes sizeof(TimeTicks) == sizeof(Time)
// in order to create a single test case list.
static_assert(sizeof(TimeTicks) == sizeof(Time),
"TimeTicks and Time must be the same size");
std::vector<TestCase> cases;
cases.push_back({reinterpret_cast<TestFunc>(&Time::Now), "Time::Now"});
cases.push_back({&TimeTicks::Now, "TimeTicks::Now"});
if (ThreadTicks::IsSupported()) {
ThreadTicks::WaitUntilInitialized();
cases.push_back(
{reinterpret_cast<TestFunc>(&ThreadTicks::Now), "ThreadTicks::Now"});
}
for (const auto& test_case : cases) {
TimeTicks start = TimeTicks::Now();
for (int index = 0; index < kLoops; index++)
test_case.func();
TimeTicks stop = TimeTicks::Now();
// Turning off the check for acceptible delays. Without this check,
// the test really doesn't do much other than measure. But the
// measurements are still useful for testing timers on various platforms.
// The reason to remove the check is because the tests run on many
// buildbots, some of which are VMs. These machines can run horribly
// slow, and there is really no value for checking against a max timer.
// const int kMaxTime = 35; // Maximum acceptible milliseconds for test.
// EXPECT_LT((stop - start).InMilliseconds(), kMaxTime);
printf("%s: %1.2fus per call\n", test_case.description,
(stop - start).InMillisecondsF() * 1000 / kLoops);
}
}
#endif // V8_OS_WIN
} // namespace base
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment