Commit dc9b8176 authored by yurys@chromium.org's avatar yurys@chromium.org

Send SIGPROF signals on the profiler event processor thread

The patch is based on the previous one that was rolled out: https://code.google.com/p/v8/source/detail?r=12985

On Linux sampling for CPU profiler is initiated on the profiler event processor thread, other platforms to follow.

CPU profiler continues to use SamplingCircularQueue, we will replave it with a single sample buffer when Mac and Win ports support profiling on the event processing thread.

When --prof option is specified profiling is initiated either on the profiler event processor thread if CPU profiler is on or on the  SignalSender thread as it used to if no CPU profiles are being collected.

ProfilerEventsProcessor::ProcessEventsAndDoSample now waits in a tight loop, processing collected samples until sampling interval expires. To save CPU resources I'm planning to change that to use nanosleep as only one sample is expected in the queue at any point.

BUG=v8:2364

Review URL: https://codereview.chromium.org/12321046

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13735 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 0184fe51
......@@ -33,18 +33,16 @@ namespace v8 {
namespace internal {
SamplingCircularQueue::SamplingCircularQueue(int record_size_in_bytes,
int desired_chunk_size_in_bytes,
int buffer_size_in_chunks)
SamplingCircularQueue::SamplingCircularQueue(
int record_size_in_bytes,
int desired_chunk_size_in_bytes,
int buffer_size_in_chunks,
bool keep_producer_consumer_distance)
: record_size_(record_size_in_bytes / sizeof(Cell)),
chunk_size_in_bytes_(desired_chunk_size_in_bytes / record_size_in_bytes *
record_size_in_bytes),
chunk_size_(chunk_size_in_bytes_ / sizeof(Cell)),
buffer_size_(chunk_size_ * buffer_size_in_chunks),
// The distance ensures that producer and consumer never step on
// each other's chunks and helps eviction of produced data from
// the CPU cache (having that chunk size is bigger than the cache.)
producer_consumer_distance_(2 * chunk_size_),
buffer_(NewArray<Cell>(buffer_size_ + 1)) {
ASSERT(buffer_size_in_chunks > 2);
// Clean up the whole buffer to avoid encountering a random kEnd
......@@ -74,7 +72,13 @@ SamplingCircularQueue::SamplingCircularQueue(int record_size_in_bytes,
ASSERT(reinterpret_cast<byte*>(consumer_pos_ + 1) <=
positions_ + positions_size);
consumer_pos_->dequeue_chunk_pos = buffer_;
consumer_pos_->dequeue_chunk_poll_pos = buffer_ + producer_consumer_distance_;
consumer_pos_->dequeue_chunk_poll_pos = buffer_;
// The distance ensures that producer and consumer never step on
// each other's chunks and helps eviction of produced data from
// the CPU cache (having that chunk size is bigger than the cache.)
if (keep_producer_consumer_distance) {
consumer_pos_->dequeue_chunk_poll_pos += 2 * chunk_size_;
}
consumer_pos_->dequeue_pos = NULL;
}
......
......@@ -47,7 +47,8 @@ class SamplingCircularQueue {
// Executed on the application thread.
SamplingCircularQueue(int record_size_in_bytes,
int desired_chunk_size_in_bytes,
int buffer_size_in_chunks);
int buffer_size_in_chunks,
bool keep_producer_consumer_distance = true);
~SamplingCircularQueue();
// Enqueue returns a pointer to a memory location for storing the next
......@@ -88,7 +89,6 @@ class SamplingCircularQueue {
const int chunk_size_in_bytes_;
const int chunk_size_;
const int buffer_size_;
const int producer_consumer_distance_;
Cell* buffer_;
byte* positions_;
ProducerPosition* producer_pos_;
......
......@@ -45,13 +45,18 @@ static const int kTickSamplesBufferChunksCount = 16;
static const int kProfilerStackSize = 64 * KB;
ProfilerEventsProcessor::ProfilerEventsProcessor(ProfileGenerator* generator)
ProfilerEventsProcessor::ProfilerEventsProcessor(ProfileGenerator* generator,
Sampler* sampler,
int period_in_useconds)
: Thread(Thread::Options("v8:ProfEvntProc", kProfilerStackSize)),
generator_(generator),
sampler_(sampler),
running_(true),
period_in_useconds_(period_in_useconds),
ticks_buffer_(sizeof(TickSampleEventRecord),
kTickSamplesBufferChunkSize,
kTickSamplesBufferChunksCount),
kTickSamplesBufferChunksCount,
!Sampler::CanSampleOnProfilerEventsProcessorThread()),
enqueue_order_(0) {
}
......@@ -239,17 +244,42 @@ bool ProfilerEventsProcessor::ProcessTicks(unsigned dequeue_order) {
}
void ProfilerEventsProcessor::ProcessEventsAndDoSample(
unsigned* dequeue_order) {
int64_t stop_time = OS::Ticks() + period_in_useconds_;
// Keep processing existing events until we need to do next sample.
while (OS::Ticks() < stop_time) {
if (ProcessTicks(*dequeue_order)) {
// All ticks of the current dequeue_order are processed,
// proceed to the next code event.
ProcessCodeEvent(dequeue_order);
}
}
// Schedule next sample. sampler_ is NULL in tests.
if (sampler_)
sampler_->DoSample();
}
void ProfilerEventsProcessor::ProcessEventsAndYield(unsigned* dequeue_order) {
if (ProcessTicks(*dequeue_order)) {
// All ticks of the current dequeue_order are processed,
// proceed to the next code event.
ProcessCodeEvent(dequeue_order);
}
YieldCPU();
}
void ProfilerEventsProcessor::Run() {
unsigned dequeue_order = 0;
while (running_) {
// Process ticks until we have any.
if (ProcessTicks(dequeue_order)) {
// All ticks of the current dequeue_order are processed,
// proceed to the next code event.
ProcessCodeEvent(&dequeue_order);
if (Sampler::CanSampleOnProfilerEventsProcessorThread()) {
ProcessEventsAndDoSample(&dequeue_order);
} else {
ProcessEventsAndYield(&dequeue_order);
}
YieldCPU();
}
// Process remaining tick events.
......@@ -486,13 +516,15 @@ void CpuProfiler::StartProcessorIfNotStarted() {
if (processor_ == NULL) {
Isolate* isolate = Isolate::Current();
Sampler* sampler = reinterpret_cast<Sampler*>(isolate->logger()->ticker_);
// Disable logging when using the new implementation.
saved_logging_nesting_ = isolate->logger()->logging_nesting_;
isolate->logger()->logging_nesting_ = 0;
generator_ = new ProfileGenerator(profiles_);
processor_ = new ProfilerEventsProcessor(generator_);
processor_ = new ProfilerEventsProcessor(generator_,
sampler,
FLAG_cpu_profiler_sampling_period);
is_profiling_ = true;
processor_->Start();
// Enumerate stuff we already have in the heap.
if (isolate->heap()->HasBeenSetUp()) {
if (!FLAG_prof_browser_mode) {
......@@ -505,12 +537,13 @@ void CpuProfiler::StartProcessorIfNotStarted() {
isolate->logger()->LogAccessorCallbacks();
}
// Enable stack sampling.
Sampler* sampler = reinterpret_cast<Sampler*>(isolate->logger()->ticker_);
if (!sampler->IsActive()) {
sampler->Start();
need_to_stop_sampler_ = true;
}
sampler->SetHasProcessingThread(true);
sampler->IncreaseProfilingDepth();
processor_->Start();
}
}
......@@ -548,6 +581,7 @@ void CpuProfiler::StopProcessor() {
Logger* logger = Isolate::Current()->logger();
Sampler* sampler = reinterpret_cast<Sampler*>(logger->ticker_);
sampler->DecreaseProfilingDepth();
sampler->SetHasProcessingThread(false);
if (need_to_stop_sampler_) {
sampler->Stop();
need_to_stop_sampler_ = false;
......
......@@ -124,7 +124,9 @@ class TickSampleEventRecord {
// methods called by event producers: VM and stack sampler threads.
class ProfilerEventsProcessor : public Thread {
public:
explicit ProfilerEventsProcessor(ProfileGenerator* generator);
ProfilerEventsProcessor(ProfileGenerator* generator,
Sampler* sampler,
int period_in_useconds);
virtual ~ProfilerEventsProcessor() {}
// Thread control.
......@@ -173,11 +175,16 @@ class ProfilerEventsProcessor : public Thread {
// Called from events processing thread (Run() method.)
bool ProcessCodeEvent(unsigned* dequeue_order);
bool ProcessTicks(unsigned dequeue_order);
void ProcessEventsAndDoSample(unsigned* dequeue_order);
void ProcessEventsAndYield(unsigned* dequeue_order);
INLINE(static bool FilterOutCodeCreateEvent(Logger::LogEventsAndTags tag));
ProfileGenerator* generator_;
Sampler* sampler_;
bool running_;
// Sampling period in microseconds.
const int period_in_useconds_;
UnboundQueue<CodeEventsContainer> events_buffer_;
SamplingCircularQueue ticks_buffer_;
UnboundQueue<TickSampleEventRecord> ticks_from_vm_buffer_;
......
......@@ -365,6 +365,10 @@ DEFINE_bool(compilation_cache, true, "enable compilation cache")
DEFINE_bool(cache_prototype_transitions, true, "cache prototype transitions")
// cpu-profiler.cc
DEFINE_int(cpu_profiler_sampling_period, 1000,
"CPU profiler sampling period in microseconds")
// debug.cc
DEFINE_bool(trace_debug_json, false, "trace debugging JSON request/response")
DEFINE_bool(trace_js_array_abuse, false,
......
......@@ -776,4 +776,21 @@ void Sampler::Stop() {
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
return false;
}
void Sampler::DoSample() {
}
void Sampler::StartProfiling() {
}
void Sampler::StopProfiling() {
}
} } // namespace v8::internal
......@@ -898,4 +898,21 @@ void Sampler::Stop() {
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
return false;
}
void Sampler::DoSample() {
}
void Sampler::StartProfiling() {
}
void Sampler::StopProfiling() {
}
} } // namespace v8::internal
......@@ -1127,29 +1127,27 @@ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
}
class Sampler::PlatformData : public Malloced {
class CpuProfilerSignalHandler {
public:
PlatformData() : vm_tid_(GetThreadID()) {}
int vm_tid() const { return vm_tid_; }
private:
const int vm_tid_;
};
class SignalSender : public Thread {
public:
static const int kSignalSenderStackSize = 64 * KB;
explicit SignalSender(int interval)
: Thread(Thread::Options("SignalSender", kSignalSenderStackSize)),
vm_tgid_(getpid()),
interval_(interval) {}
static void SetUp() { if (!mutex_) mutex_ = OS::CreateMutex(); }
static void TearDown() { delete mutex_; }
static bool RegisterProfilingSampler() {
ScopedLock lock(mutex_);
if (!profiling_samplers_count_) InstallSignalHandler();
++profiling_samplers_count_;
return signal_handler_installed_;
}
static void UnregisterProfilingSampler() {
ScopedLock lock(mutex_);
ASSERT(profiling_samplers_count_ > 0);
if (!profiling_samplers_count_) return;
if (profiling_samplers_count_ == 1) RestoreSignalHandler();
--profiling_samplers_count_;
}
private:
static void InstallSignalHandler() {
struct sigaction sa;
sa.sa_sigaction = ProfilerSignalHandler;
......@@ -1166,6 +1164,61 @@ class SignalSender : public Thread {
}
}
// Protects the process wide state below.
static Mutex* mutex_;
static int profiling_samplers_count_;
static bool signal_handler_installed_;
static struct sigaction old_signal_handler_;
};
Mutex* CpuProfilerSignalHandler::mutex_ = NULL;
int CpuProfilerSignalHandler::profiling_samplers_count_ = 0;
bool CpuProfilerSignalHandler::signal_handler_installed_ = false;
struct sigaction CpuProfilerSignalHandler::old_signal_handler_;
class Sampler::PlatformData : public Malloced {
public:
PlatformData()
: vm_tgid_(getpid()),
vm_tid_(GetThreadID()),
signal_handler_installed_(false) {}
void set_signal_handler_installed(bool value) {
signal_handler_installed_ = value;
}
void SendProfilingSignal() {
if (!signal_handler_installed_) return;
// Glibc doesn't provide a wrapper for tgkill(2).
#if defined(ANDROID)
syscall(__NR_tgkill, vm_tgid_, vm_tid_, SIGPROF);
#else
int result = syscall(SYS_tgkill, vm_tgid_, vm_tid_, SIGPROF);
USE(result);
ASSERT(result == 0);
#endif
}
private:
const int vm_tgid_;
const int vm_tid_;
bool signal_handler_installed_;
};
class SignalSender : public Thread {
public:
static const int kSignalSenderStackSize = 64 * KB;
explicit SignalSender(int interval)
: Thread(Thread::Options("SignalSender", kSignalSenderStackSize)),
interval_(interval) {}
static void SetUp() { if (!mutex_) mutex_ = OS::CreateMutex(); }
static void TearDown() { delete mutex_; }
static void AddActiveSampler(Sampler* sampler) {
ScopedLock lock(mutex_);
SamplerRegistry::AddActiveSampler(sampler);
......@@ -1186,7 +1239,6 @@ class SignalSender : public Thread {
RuntimeProfiler::StopRuntimeProfilerThreadBeforeShutdown(instance_);
delete instance_;
instance_ = NULL;
RestoreSignalHandler();
}
}
......@@ -1198,10 +1250,8 @@ class SignalSender : public Thread {
// When CPU profiling is enabled both JavaScript and C++ code is
// profiled. We must not suspend.
if (state == SamplerRegistry::HAS_CPU_PROFILING_SAMPLERS) {
if (!signal_handler_installed_) InstallSignalHandler();
SamplerRegistry::IterateActiveSamplers(&DoCpuProfile, this);
} else {
if (signal_handler_installed_) RestoreSignalHandler();
if (RuntimeProfiler::WaitForSomeIsolateToEnterJS()) continue;
}
Sleep(); // TODO(svenpanne) Figure out if OS:Sleep(interval_) is enough.
......@@ -1210,20 +1260,7 @@ class SignalSender : public Thread {
static void DoCpuProfile(Sampler* sampler, void* raw_sender) {
if (!sampler->IsProfiling()) return;
SignalSender* sender = reinterpret_cast<SignalSender*>(raw_sender);
sender->SendProfilingSignal(sampler->platform_data()->vm_tid());
}
void SendProfilingSignal(int tid) {
if (!signal_handler_installed_) return;
// Glibc doesn't provide a wrapper for tgkill(2).
#if defined(ANDROID)
syscall(__NR_tgkill, vm_tgid_, tid, SIGPROF);
#else
int result = syscall(SYS_tgkill, vm_tgid_, tid, SIGPROF);
USE(result);
ASSERT(result == 0);
#endif
sampler->DoSample();
}
void Sleep() {
......@@ -1247,13 +1284,11 @@ class SignalSender : public Thread {
#endif // ANDROID
}
const int vm_tgid_;
const int interval_;
// Protects the process wide state below.
static Mutex* mutex_;
static SignalSender* instance_;
static bool signal_handler_installed_;
static struct sigaction old_signal_handler_;
private:
......@@ -1264,7 +1299,6 @@ class SignalSender : public Thread {
Mutex* SignalSender::mutex_ = NULL;
SignalSender* SignalSender::instance_ = NULL;
struct sigaction SignalSender::old_signal_handler_;
bool SignalSender::signal_handler_installed_ = false;
void OS::SetUp() {
......@@ -1292,10 +1326,12 @@ void OS::SetUp() {
}
#endif
SignalSender::SetUp();
CpuProfilerSignalHandler::SetUp();
}
void OS::TearDown() {
CpuProfilerSignalHandler::TearDown();
SignalSender::TearDown();
delete limit_mutex;
}
......@@ -1305,6 +1341,7 @@ Sampler::Sampler(Isolate* isolate, int interval)
: isolate_(isolate),
interval_(interval),
profiling_(false),
has_processing_thread_(false),
active_(false),
samples_taken_(0) {
data_ = new PlatformData;
......@@ -1331,4 +1368,26 @@ void Sampler::Stop() {
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
return true;
}
void Sampler::DoSample() {
platform_data()->SendProfilingSignal();
}
void Sampler::StartProfiling() {
platform_data()->set_signal_handler_installed(
CpuProfilerSignalHandler::RegisterProfilingSampler());
}
void Sampler::StopProfiling() {
CpuProfilerSignalHandler::UnregisterProfilingSampler();
platform_data()->set_signal_handler_installed(false);
}
} } // namespace v8::internal
......@@ -922,4 +922,21 @@ void Sampler::Stop() {
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
return false;
}
void Sampler::DoSample() {
}
void Sampler::StartProfiling() {
}
void Sampler::StopProfiling() {
}
} } // namespace v8::internal
......@@ -525,4 +525,25 @@ void ProfileSampler::Stop() {
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
UNIMPLEMENTED();
return false;
}
void Sampler::DoSample() {
UNIMPLEMENTED();
}
void Sampler::StartProfiling() {
UNIMPLEMENTED();
}
void Sampler::StopProfiling() {
UNIMPLEMENTED();
}
} } // namespace v8::internal
......@@ -955,4 +955,21 @@ void Sampler::Stop() {
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
return false;
}
void Sampler::DoSample() {
}
void Sampler::StartProfiling() {
}
void Sampler::StopProfiling() {
}
} } // namespace v8::internal
......@@ -872,4 +872,22 @@ void Sampler::Stop() {
SetActive(false);
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
return false;
}
void Sampler::DoSample() {
}
void Sampler::StartProfiling() {
}
void Sampler::StopProfiling() {
}
} } // namespace v8::internal
......@@ -2153,4 +2153,21 @@ void Sampler::Stop() {
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
return false;
}
void Sampler::DoSample() {
}
void Sampler::StartProfiling() {
}
void Sampler::StopProfiling() {
}
} } // namespace v8::internal
......@@ -766,10 +766,17 @@ class Sampler {
void Start();
void Stop();
// Is the sampler used for profiling?
bool IsProfiling() const { return NoBarrier_Load(&profiling_) > 0; }
void IncreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, 1); }
void DecreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, -1); }
// Whether the sampling thread should use this Sampler for CPU profiling?
bool IsProfiling() const {
return NoBarrier_Load(&profiling_) > 0 &&
!NoBarrier_Load(&has_processing_thread_);
}
void IncreaseProfilingDepth() {
if (NoBarrier_AtomicIncrement(&profiling_, 1) == 1) StartProfiling();
}
void DecreaseProfilingDepth() {
if (!NoBarrier_AtomicIncrement(&profiling_, -1)) StopProfiling();
}
// Whether the sampler is running (that is, consumes resources).
bool IsActive() const { return NoBarrier_Load(&active_); }
......@@ -785,6 +792,14 @@ class Sampler {
PlatformData* platform_data() { return data_; }
// If true next sample must be initiated on the profiler event processor
// thread right after latest sample is processed.
static bool CanSampleOnProfilerEventsProcessorThread();
void DoSample();
void SetHasProcessingThread(bool value) {
NoBarrier_Store(&has_processing_thread_, value);
}
protected:
virtual void DoSampleStack(TickSample* sample) = 0;
......@@ -792,9 +807,15 @@ class Sampler {
void SetActive(bool value) { NoBarrier_Store(&active_, value); }
void IncSamplesTaken() { if (++samples_taken_ < 0) samples_taken_ = 0; }
// Perform platform-specific initialization before DoSample() may be invoked.
void StartProfiling();
// Perform platform-specific cleanup after profiling.
void StopProfiling();
Isolate* isolate_;
const int interval_;
Atomic32 profiling_;
Atomic32 has_processing_thread_;
Atomic32 active_;
PlatformData* data_; // Platform specific data.
int samples_taken_; // Counts stack samples taken.
......
......@@ -20,7 +20,7 @@ using i::TokenEnumerator;
TEST(StartStop) {
CpuProfilesCollection profiles;
ProfileGenerator generator(&profiles);
ProfilerEventsProcessor processor(&generator);
ProfilerEventsProcessor processor(&generator, NULL, 100);
processor.Start();
processor.Stop();
processor.Join();
......@@ -84,7 +84,7 @@ TEST(CodeEvents) {
CpuProfilesCollection profiles;
profiles.StartProfiling("", 1);
ProfileGenerator generator(&profiles);
ProfilerEventsProcessor processor(&generator);
ProfilerEventsProcessor processor(&generator, NULL, 100);
processor.Start();
// Enqueue code creation events.
......@@ -145,7 +145,7 @@ TEST(TickEvents) {
CpuProfilesCollection profiles;
profiles.StartProfiling("", 1);
ProfileGenerator generator(&profiles);
ProfilerEventsProcessor processor(&generator);
ProfilerEventsProcessor processor(&generator, NULL, 100);
processor.Start();
processor.CodeCreateEvent(i::Logger::BUILTIN_TAG,
......@@ -235,7 +235,7 @@ TEST(Issue1398) {
CpuProfilesCollection profiles;
profiles.StartProfiling("", 1);
ProfileGenerator generator(&profiles);
ProfilerEventsProcessor processor(&generator);
ProfilerEventsProcessor processor(&generator, NULL, 100);
processor.Start();
processor.CodeCreateEvent(i::Logger::BUILTIN_TAG,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment