Commit 50537bad authored by mattloring's avatar mattloring Committed by Commit bot

Unsampling for the sampling heap profiler

Implements poisson unsampling. A poisson process is used to determine
which samples to collect based on a sample rate. Unsampling will
approximate the true number of allocations at each site taking into
account that smaller allocations are less likley to be sampled.

This work was originally being done in the agent that
consumes profiles but it is more efficient to do it here
and individual consumers of the API should not have to
worry about the mathematical details of the sampling
process.

R=ofrobots@google.com
BUG=

Review URL: https://codereview.chromium.org/1706343002

Cr-Commit-Position: refs/heads/master@{#34234}
parent a5296768
......@@ -33,6 +33,20 @@ intptr_t SamplingAllocationObserver::GetNextSampleInterval(uint64_t rate) {
: (next > INT_MAX ? INT_MAX : static_cast<intptr_t>(next));
}
// Samples were collected according to a poisson process. Since we have not
// recorded all allocations, we must approximate the shape of the underlying
// space of allocations based on the samples we have collected. Given that
// we sample at rate R, the probability that an allocation of size S will be
// sampled is 1-exp(-S/R). This function uses the above probability to
// approximate the true number of allocations with size *size* given that
// *count* samples were observed.
v8::AllocationProfile::Allocation SamplingHeapProfiler::ScaleSample(
size_t size, unsigned int count) {
double scale = 1.0 / (1.0 - std::exp(-static_cast<double>(size) / rate_));
// Round count instead of truncating.
return {size, static_cast<unsigned int>(count * scale + 0.5)};
}
SamplingHeapProfiler::SamplingHeapProfiler(Heap* heap, StringsStorage* names,
uint64_t rate, int stack_depth)
: isolate_(heap->isolate()),
......@@ -46,7 +60,9 @@ SamplingHeapProfiler::SamplingHeapProfiler(Heap* heap, StringsStorage* names,
names_(names),
profile_root_("(root)", v8::UnboundScript::kNoScriptId, 0),
samples_(),
stack_depth_(stack_depth) {
stack_depth_(stack_depth),
rate_(rate) {
CHECK_GT(rate_, 0);
heap->new_space()->AddAllocationObserver(new_space_observer_.get());
AllSpaces spaces(heap);
for (Space* space = spaces.next(); space != NULL; space = spaces.next()) {
......@@ -197,7 +213,7 @@ v8::AllocationProfile::Node* SamplingHeapProfiler::TranslateAllocationNode(
line = 1 + Script::GetLineNumber(script_handle, node->script_position_);
column = 1 + Script::GetColumnNumber(script_handle, node->script_position_);
for (auto alloc : node->allocations_) {
allocations.push_back({alloc.first, alloc.second});
allocations.push_back(ScaleSample(alloc.first, alloc.second));
}
}
......
......@@ -111,6 +111,8 @@ class SamplingHeapProfiler {
v8::AllocationProfile::Node* TranslateAllocationNode(
AllocationProfile* profile, SamplingHeapProfiler::AllocationNode* node,
const std::map<int, Script*>& scripts);
v8::AllocationProfile::Allocation ScaleSample(size_t size,
unsigned int count);
AllocationNode* AddStack();
AllocationNode* FindOrAddChildNode(AllocationNode* parent, const char* name,
int script_id, int start_position);
......@@ -123,6 +125,7 @@ class SamplingHeapProfiler {
AllocationNode profile_root_;
std::set<Sample*> samples_;
const int stack_depth_;
const uint64_t rate_;
friend class SamplingAllocationObserver;
};
......
......@@ -2901,9 +2901,9 @@ TEST(SamplingHeapProfiler) {
CHECK(profile == nullptr);
}
int count_512kb = 0;
int count_1024 = 0;
{
heap_profiler->StartSamplingHeapProfiler(512 * 1024);
heap_profiler->StartSamplingHeapProfiler(1024);
CompileRun(script_source);
v8::base::SmartPointer<v8::AllocationProfile> profile(
......@@ -2917,7 +2917,7 @@ TEST(SamplingHeapProfiler) {
// Count the number of allocations we sampled from bar.
for (auto allocation : node_bar->allocations) {
count_512kb += allocation.count;
count_1024 += allocation.count;
}
heap_profiler->StopSamplingHeapProfiler();
......@@ -2929,9 +2929,9 @@ TEST(SamplingHeapProfiler) {
CHECK(profile == nullptr);
}
// Sampling at a higher rate should give us more sampled objects.
// Sampling at a higher rate should give us similar numbers of objects.
{
heap_profiler->StartSamplingHeapProfiler(32 * 1024);
heap_profiler->StartSamplingHeapProfiler(128);
CompileRun(script_source);
v8::base::SmartPointer<v8::AllocationProfile> profile(
......@@ -2944,15 +2944,21 @@ TEST(SamplingHeapProfiler) {
CHECK(node_bar);
// Count the number of allocations we sampled from bar.
int count_32kb = 0;
int count_128 = 0;
for (auto allocation : node_bar->allocations) {
count_32kb += allocation.count;
count_128 += allocation.count;
}
// We should have roughly 16x as many sampled allocations. However,
// alignment and boundaries might tweak the numbers slightly. We use a
// slightly weaker test to account for this.
CHECK_GT(count_32kb, 8 * count_512kb);
// We should have similar unsampled counts of allocations. Though
// we will sample different numbers of objects at different rates,
// the unsampling process should produce similar final estimates
// at the true number of allocations. However, the process to
// determine these unsampled counts is probabilisitic so we need to
// account for error.
double max_count = std::max(count_128, count_1024);
double min_count = std::min(count_128, count_1024);
double percent_difference = (max_count - min_count) / min_count;
CHECK_LT(percent_difference, 0.15);
heap_profiler->StopSamplingHeapProfiler();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment