Commit 4db1f475 authored by yurys@chromium.org's avatar yurys@chromium.org

Support higher CPU profiler sampling rate on posix systems

New flag is added that allows to specify CPU profiler sampling rate in microseconds as command line argument. It was tested to work fine with 100us interval(currently it is 1ms). Default values are kept the same as in the current implementation. The new implementation is enabled only on POSIX platforms which use signals to collect samples. Other platforms that pause thread being sampled are to follow.

SIGPROF signals are now sent on the profiler event processor thread to make sure that the processing thread does fall far behind the sampling.

The patch is based on the previous one that was rolled out in r13851. The main difference is that the circular queue is not modified for now.

On Linux sampling for CPU profiler is initiated on the profiler event processor thread, other platforms to follow.

CPU profiler continues to use SamplingCircularQueue, we will probably replace it with a single sample buffer when Mac and Win ports support profiling on the event processing thread.

When --prof option is specified profiling is initiated either on the profiler event processor thread if CPU profiler is on or on the SignalSender thread as it used to be if no CPU profiles are being collected.

ProfilerEventsProcessor::ProcessEventsAndDoSample now waits in a tight loop, processing collected samples until sampling interval expires. To save CPU resources I'm planning to change that to use nanosleep as only one sample is expected in the queue at any point.

BUG=v8:2814
R=bmeurer@chromium.org

Review URL: https://codereview.chromium.org/21101002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@16310 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent bb19c48f
...@@ -43,10 +43,15 @@ namespace internal { ...@@ -43,10 +43,15 @@ namespace internal {
static const int kProfilerStackSize = 64 * KB; static const int kProfilerStackSize = 64 * KB;
ProfilerEventsProcessor::ProfilerEventsProcessor(ProfileGenerator* generator) ProfilerEventsProcessor::ProfilerEventsProcessor(
ProfileGenerator* generator,
Sampler* sampler,
int period_in_useconds)
: Thread(Thread::Options("v8:ProfEvntProc", kProfilerStackSize)), : Thread(Thread::Options("v8:ProfEvntProc", kProfilerStackSize)),
generator_(generator), generator_(generator),
sampler_(sampler),
running_(true), running_(true),
period_in_useconds_(period_in_useconds),
last_code_event_id_(0), last_processed_code_event_id_(0) { last_code_event_id_(0), last_processed_code_event_id_(0) {
} }
...@@ -118,15 +123,39 @@ bool ProfilerEventsProcessor::ProcessTicks() { ...@@ -118,15 +123,39 @@ bool ProfilerEventsProcessor::ProcessTicks() {
} }
void ProfilerEventsProcessor::Run() { void ProfilerEventsProcessor::ProcessEventsAndDoSample() {
while (running_) { int64_t stop_time = OS::Ticks() + period_in_useconds_;
// Process ticks until we have any. // Keep processing existing events until we need to do next sample.
while (OS::Ticks() < stop_time) {
if (ProcessTicks()) { if (ProcessTicks()) {
// All ticks of the current last_processed_code_event_id_ are processed, // All ticks of the current dequeue_order are processed,
// proceed to the next code event. // proceed to the next code event.
ProcessCodeEvent(); ProcessCodeEvent();
} }
YieldCPU(); }
// Schedule next sample. sampler_ is NULL in tests.
if (sampler_) sampler_->DoSample();
}
void ProfilerEventsProcessor::ProcessEventsAndYield() {
// Process ticks until we have any.
if (ProcessTicks()) {
// All ticks of the current dequeue_order are processed,
// proceed to the next code event.
ProcessCodeEvent();
}
YieldCPU();
}
void ProfilerEventsProcessor::Run() {
while (running_) {
if (Sampler::CanSampleOnProfilerEventsProcessorThread()) {
ProcessEventsAndDoSample();
} else {
ProcessEventsAndYield();
}
} }
// Process remaining tick events. // Process remaining tick events.
...@@ -403,7 +432,9 @@ void CpuProfiler::StartProcessorIfNotStarted() { ...@@ -403,7 +432,9 @@ void CpuProfiler::StartProcessorIfNotStarted() {
saved_logging_nesting_ = logger->logging_nesting_; saved_logging_nesting_ = logger->logging_nesting_;
logger->logging_nesting_ = 0; logger->logging_nesting_ = 0;
generator_ = new ProfileGenerator(profiles_); generator_ = new ProfileGenerator(profiles_);
processor_ = new ProfilerEventsProcessor(generator_); Sampler* sampler = logger->sampler();
processor_ = new ProfilerEventsProcessor(
generator_, sampler, FLAG_cpu_profiler_sampling_interval);
is_profiling_ = true; is_profiling_ = true;
processor_->StartSynchronously(); processor_->StartSynchronously();
// Enumerate stuff we already have in the heap. // Enumerate stuff we already have in the heap.
...@@ -415,7 +446,9 @@ void CpuProfiler::StartProcessorIfNotStarted() { ...@@ -415,7 +446,9 @@ void CpuProfiler::StartProcessorIfNotStarted() {
logger->LogAccessorCallbacks(); logger->LogAccessorCallbacks();
LogBuiltins(); LogBuiltins();
// Enable stack sampling. // Enable stack sampling.
Sampler* sampler = logger->sampler(); if (Sampler::CanSampleOnProfilerEventsProcessorThread()) {
sampler->SetHasProcessingThread(true);
}
sampler->IncreaseProfilingDepth(); sampler->IncreaseProfilingDepth();
if (!sampler->IsActive()) { if (!sampler->IsActive()) {
sampler->Start(); sampler->Start();
...@@ -453,16 +486,19 @@ void CpuProfiler::StopProcessor() { ...@@ -453,16 +486,19 @@ void CpuProfiler::StopProcessor() {
Logger* logger = isolate_->logger(); Logger* logger = isolate_->logger();
Sampler* sampler = reinterpret_cast<Sampler*>(logger->ticker_); Sampler* sampler = reinterpret_cast<Sampler*>(logger->ticker_);
sampler->DecreaseProfilingDepth(); sampler->DecreaseProfilingDepth();
if (need_to_stop_sampler_) {
sampler->Stop();
need_to_stop_sampler_ = false;
}
is_profiling_ = false; is_profiling_ = false;
processor_->StopSynchronously(); processor_->StopSynchronously();
delete processor_; delete processor_;
delete generator_; delete generator_;
processor_ = NULL; processor_ = NULL;
generator_ = NULL; generator_ = NULL;
if (Sampler::CanSampleOnProfilerEventsProcessorThread()) {
sampler->SetHasProcessingThread(false);
}
if (need_to_stop_sampler_) {
sampler->Stop();
need_to_stop_sampler_ = false;
}
logger->logging_nesting_ = saved_logging_nesting_; logger->logging_nesting_ = saved_logging_nesting_;
} }
......
...@@ -136,7 +136,9 @@ class CodeEventsContainer { ...@@ -136,7 +136,9 @@ class CodeEventsContainer {
// methods called by event producers: VM and stack sampler threads. // methods called by event producers: VM and stack sampler threads.
class ProfilerEventsProcessor : public Thread { class ProfilerEventsProcessor : public Thread {
public: public:
explicit ProfilerEventsProcessor(ProfileGenerator* generator); ProfilerEventsProcessor(ProfileGenerator* generator,
Sampler* sampler,
int period_in_useconds);
virtual ~ProfilerEventsProcessor() {} virtual ~ProfilerEventsProcessor() {}
// Thread control. // Thread control.
...@@ -160,8 +162,14 @@ class ProfilerEventsProcessor : public Thread { ...@@ -160,8 +162,14 @@ class ProfilerEventsProcessor : public Thread {
bool ProcessCodeEvent(); bool ProcessCodeEvent();
bool ProcessTicks(); bool ProcessTicks();
void ProcessEventsAndDoSample();
void ProcessEventsAndYield();
ProfileGenerator* generator_; ProfileGenerator* generator_;
Sampler* sampler_;
bool running_; bool running_;
// Sampling period in microseconds.
const int period_in_useconds_;
UnboundQueue<CodeEventsContainer> events_buffer_; UnboundQueue<CodeEventsContainer> events_buffer_;
static const size_t kTickSampleBufferSize = 1 * MB; static const size_t kTickSampleBufferSize = 1 * MB;
static const size_t kTickSampleQueueLength = static const size_t kTickSampleQueueLength =
......
...@@ -462,6 +462,18 @@ DEFINE_bool(compilation_cache, true, "enable compilation cache") ...@@ -462,6 +462,18 @@ DEFINE_bool(compilation_cache, true, "enable compilation cache")
DEFINE_bool(cache_prototype_transitions, true, "cache prototype transitions") DEFINE_bool(cache_prototype_transitions, true, "cache prototype transitions")
// cpu-profiler.cc
#if defined(ANDROID)
// Phones and tablets have processors that are much slower than desktop
// and laptop computers for which current heuristics are tuned.
#define DEFAULT_INTERVAL 5000
#else
#define DEFAULT_INTERVAL 1000
#endif
DEFINE_int(cpu_profiler_sampling_interval, DEFAULT_INTERVAL,
"CPU profiler sampling interval in microseconds")
#undef DEFAULT_INTERVAL
// debug.cc // debug.cc
DEFINE_bool(trace_debug_json, false, "trace debugging JSON request/response") DEFINE_bool(trace_debug_json, false, "trace debugging JSON request/response")
DEFINE_bool(trace_js_array_abuse, false, DEFINE_bool(trace_js_array_abuse, false,
......
...@@ -174,7 +174,8 @@ class PlatformDataCommon : public Malloced { ...@@ -174,7 +174,8 @@ class PlatformDataCommon : public Malloced {
class Sampler::PlatformData : public PlatformDataCommon { class Sampler::PlatformData : public PlatformDataCommon {
public: public:
PlatformData() : vm_tid_(pthread_self()) {} PlatformData() : vm_tid_(pthread_self()) {}
pthread_t vm_tid() const { return vm_tid_; }
void SendProfilingSignal() const;
private: private:
pthread_t vm_tid_; pthread_t vm_tid_;
...@@ -492,9 +493,7 @@ class SamplerThread : public Thread { ...@@ -492,9 +493,7 @@ class SamplerThread : public Thread {
#if defined(USE_SIGNALS) #if defined(USE_SIGNALS)
void SampleContext(Sampler* sampler) { void SampleContext(Sampler* sampler) {
if (!SignalHandler::Installed()) return; sampler->platform_data()->SendProfilingSignal();
pthread_t tid = sampler->platform_data()->vm_tid();
pthread_kill(tid, SIGPROF);
} }
#elif defined(__MACH__) #elif defined(__MACH__)
...@@ -607,6 +606,14 @@ Mutex* SamplerThread::mutex_ = NULL; ...@@ -607,6 +606,14 @@ Mutex* SamplerThread::mutex_ = NULL;
SamplerThread* SamplerThread::instance_ = NULL; SamplerThread* SamplerThread::instance_ = NULL;
#if defined(USE_SIGNALS)
void Sampler::PlatformData::SendProfilingSignal() const {
if (!SignalHandler::Installed()) return;
pthread_kill(vm_tid_, SIGPROF);
}
#endif
// //
// StackTracer implementation // StackTracer implementation
// //
...@@ -665,6 +672,7 @@ Sampler::Sampler(Isolate* isolate, int interval) ...@@ -665,6 +672,7 @@ Sampler::Sampler(Isolate* isolate, int interval)
: isolate_(isolate), : isolate_(isolate),
interval_(interval), interval_(interval),
profiling_(false), profiling_(false),
has_processing_thread_(false),
active_(false), active_(false),
is_counting_samples_(false), is_counting_samples_(false),
js_and_external_sample_count_(0) { js_and_external_sample_count_(0) {
...@@ -708,4 +716,20 @@ void Sampler::SampleStack(const RegisterState& state) { ...@@ -708,4 +716,20 @@ void Sampler::SampleStack(const RegisterState& state) {
} }
} }
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
#if defined(USE_SIGNALS)
return true;
#else
return false;
#endif
}
void Sampler::DoSample() {
#if defined(USE_SIGNALS)
platform_data()->SendProfilingSignal();
#endif
}
} } // namespace v8::internal } } // namespace v8::internal
...@@ -94,14 +94,25 @@ class Sampler { ...@@ -94,14 +94,25 @@ class Sampler {
void Start(); void Start();
void Stop(); void Stop();
// Is the sampler used for profiling? // Whether the sampling thread should use this Sampler for CPU profiling?
bool IsProfiling() const { return NoBarrier_Load(&profiling_) > 0; } bool IsProfiling() const {
return NoBarrier_Load(&profiling_) > 0 &&
!NoBarrier_Load(&has_processing_thread_);
}
void IncreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, 1); } void IncreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, 1); }
void DecreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, -1); } void DecreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, -1); }
// Whether the sampler is running (that is, consumes resources). // Whether the sampler is running (that is, consumes resources).
bool IsActive() const { return NoBarrier_Load(&active_); } bool IsActive() const { return NoBarrier_Load(&active_); }
// If true next sample must be initiated on the profiler event processor
// thread right after latest sample is processed.
static bool CanSampleOnProfilerEventsProcessorThread();
void DoSample();
void SetHasProcessingThread(bool value) {
NoBarrier_Store(&has_processing_thread_, value);
}
// Used in tests to make sure that stack sampling is performed. // Used in tests to make sure that stack sampling is performed.
unsigned js_and_external_sample_count() const { unsigned js_and_external_sample_count() const {
return js_and_external_sample_count_; return js_and_external_sample_count_;
...@@ -125,6 +136,7 @@ class Sampler { ...@@ -125,6 +136,7 @@ class Sampler {
Isolate* isolate_; Isolate* isolate_;
const int interval_; const int interval_;
Atomic32 profiling_; Atomic32 profiling_;
Atomic32 has_processing_thread_;
Atomic32 active_; Atomic32 active_;
PlatformData* data_; // Platform specific data. PlatformData* data_; // Platform specific data.
bool is_counting_samples_; bool is_counting_samples_;
......
...@@ -51,7 +51,7 @@ TEST(StartStop) { ...@@ -51,7 +51,7 @@ TEST(StartStop) {
CpuProfilesCollection profiles; CpuProfilesCollection profiles;
ProfileGenerator generator(&profiles); ProfileGenerator generator(&profiles);
SmartPointer<ProfilerEventsProcessor> processor( SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator)); new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start(); processor->Start();
processor->StopSynchronously(); processor->StopSynchronously();
} }
...@@ -143,7 +143,7 @@ TEST(CodeEvents) { ...@@ -143,7 +143,7 @@ TEST(CodeEvents) {
profiles->StartProfiling("", 1, false); profiles->StartProfiling("", 1, false);
ProfileGenerator generator(profiles); ProfileGenerator generator(profiles);
SmartPointer<ProfilerEventsProcessor> processor( SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator)); new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start(); processor->Start();
CpuProfiler profiler(isolate, profiles, &generator, *processor); CpuProfiler profiler(isolate, profiles, &generator, *processor);
...@@ -205,7 +205,7 @@ TEST(TickEvents) { ...@@ -205,7 +205,7 @@ TEST(TickEvents) {
profiles->StartProfiling("", 1, false); profiles->StartProfiling("", 1, false);
ProfileGenerator generator(profiles); ProfileGenerator generator(profiles);
SmartPointer<ProfilerEventsProcessor> processor( SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator)); new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start(); processor->Start();
CpuProfiler profiler(isolate, profiles, &generator, *processor); CpuProfiler profiler(isolate, profiles, &generator, *processor);
...@@ -274,7 +274,7 @@ TEST(Issue1398) { ...@@ -274,7 +274,7 @@ TEST(Issue1398) {
profiles->StartProfiling("", 1, false); profiles->StartProfiling("", 1, false);
ProfileGenerator generator(profiles); ProfileGenerator generator(profiles);
SmartPointer<ProfilerEventsProcessor> processor( SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator)); new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start(); processor->Start();
CpuProfiler profiler(isolate, profiles, &generator, *processor); CpuProfiler profiler(isolate, profiles, &generator, *processor);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment