Commit 18c73676 authored by Seth Brenith's avatar Seth Brenith Committed by Commit Bot

[diagnostics] Support --turbo-profiling for builtins

Currently, if d8 is run with the --turbo-profiling flag, it prints info
about every TurboFan-compiled function. This info includes the number of
times that each basic block in the function was run. It also includes
text representations of the function's schedule and code, so that the
person reading the output can associate counters with blocks of code.

The data about each function is currently stored in a
BasicBlockProfiler::Data instance, which is attached to a list owned by
the singleton BasicBlockProfiler. Each Data contains an
std::vector<uint32_t> which represents how many times each block in the
function has executed. The generated code for each block uses a raw
pointer into the storage of that vector to implement incrementing the
counter.

With this change, if you compile with v8_enable_builtins_profiling and
then run with --turbo-profiling, d8 will print that same info about
builtins too.

In order to generate code that can survive being serialized to a
snapshot and reloaded, this change uses counters in the JS heap instead
of a std::vector outside the JS heap. The steps for instrumentation are
as follows:

1. Between scheduling and instruction selection, add code to increment
   the counter for each block. The counters array doesn't yet exist at
   this point, and allocation is disallowed, so at this point the code
   refers to a special marker value.
2. During finalization of the code, allocate a BasicBlockProfilingData
   object on the JS heap containing data equivalent to what is stored in
   BasicBlockProfiler::Data. This includes a ByteArray that is big
   enough to store the counters for each block.
3. Patch the reference in the BuiltinsConstantsTableBuilder so that
   instead of referring to the marker object, it now refers to this
   ByteArray. Also add the BasicBlockProfilingData object to a list that
   is attached to the heap roots so it can be easily accessed for
   printing.

Because these steps include modifying the BuiltinsConstantsTableBuilder,
this procedure is only applicable to builtins. Runtime-generated code
still uses raw pointers into std::vector instances. In order to keep
divergence between these code paths to a minimum, most work is done
referring to instances of BasicBlockProfiler::Data (the C++ class), and
functions are provided to copy back and forth between that type and
BasicBlockProfilingData (the JS heap object).

This change is intended only to make --turbo-profiling work consistently
on more kinds of functions, but with some further work, this data could
form the basis for:
- code coverage info for fuzzers, and/or
- hot-path info for profile-guided optimization.

Bug: v8:10470, v8:9119
Change-Id: Ib556a5bc3abe67cdaa2e3ee62702a2a08b11cb61
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2159738
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67944}
parent 16a23e9a
......@@ -130,6 +130,11 @@ declare_args() {
# Sets -dV8_ARRAY_BUFFER_EXTENSION
v8_enable_array_buffer_extension = true
# Runs mksnapshot with --turbo-profiling. After building in this
# configuration, any subsequent run of d8 with --turbo-profiling will output
# information about both runtime-generated code and builtins.
v8_enable_builtins_profiling = false
# Enables various testing features.
v8_enable_test_features = ""
......@@ -1410,6 +1415,10 @@ template("run_mksnapshot") {
rebase_path("$target_gen_dir/embedded${suffix}.S", root_build_dir),
]
if (v8_enable_builtins_profiling) {
args += [ "--turbo-profiling" ]
}
# This is needed to distinguish between generating code for the simulator
# and cross-compiling. The latter may need to run code on the host with the
# simulator but cannot use simulator-specific instructions.
......
......@@ -57,24 +57,30 @@ uint32_t BuiltinsConstantsTableBuilder::AddObject(Handle<Object> object) {
}
}
void BuiltinsConstantsTableBuilder::PatchSelfReference(
Handle<Object> self_reference, Handle<Code> code_object) {
#ifdef DEBUG
namespace {
void CheckPreconditionsForPatching(Isolate* isolate,
Handle<Object> replacement_object) {
// Roots must not be inserted into the constants table as they are already
// accessibly from the root list.
// accessible from the root list.
RootIndex root_list_index;
DCHECK(!isolate_->roots_table().IsRootHandle(code_object, &root_list_index));
DCHECK(!isolate->roots_table().IsRootHandle(replacement_object,
&root_list_index));
USE(root_list_index);
// Not yet finalized.
DCHECK_EQ(ReadOnlyRoots(isolate_).empty_fixed_array(),
isolate_->heap()->builtins_constants_table());
DCHECK_EQ(ReadOnlyRoots(isolate).empty_fixed_array(),
isolate->heap()->builtins_constants_table());
DCHECK(isolate_->IsGeneratingEmbeddedBuiltins());
DCHECK(isolate->IsGeneratingEmbeddedBuiltins());
}
} // namespace
void BuiltinsConstantsTableBuilder::PatchSelfReference(
Handle<Object> self_reference, Handle<Code> code_object) {
CheckPreconditionsForPatching(isolate_, code_object);
DCHECK(self_reference->IsOddball());
DCHECK(Oddball::cast(*self_reference).kind() ==
Oddball::kSelfReferenceMarker);
#endif
uint32_t key;
if (map_.Delete(self_reference, &key)) {
......@@ -83,6 +89,17 @@ void BuiltinsConstantsTableBuilder::PatchSelfReference(
}
}
void BuiltinsConstantsTableBuilder::PatchBasicBlockCountersReference(
Handle<ByteArray> counters) {
CheckPreconditionsForPatching(isolate_, counters);
uint32_t key;
if (map_.Delete(ReadOnlyRoots(isolate_).basic_block_counters_marker(),
&key)) {
map_.Set(counters, key);
}
}
void BuiltinsConstantsTableBuilder::Finalize() {
HandleScope handle_scope(isolate_);
......@@ -117,6 +134,8 @@ void BuiltinsConstantsTableBuilder::Finalize() {
DCHECK(table->get(i).IsHeapObject());
DCHECK_NE(ReadOnlyRoots(isolate_).undefined_value(), table->get(i));
DCHECK_NE(ReadOnlyRoots(isolate_).self_reference_marker(), table->get(i));
DCHECK_NE(ReadOnlyRoots(isolate_).basic_block_counters_marker(),
table->get(i));
}
#endif
......
......@@ -34,6 +34,11 @@ class BuiltinsConstantsTableBuilder final {
void PatchSelfReference(Handle<Object> self_reference,
Handle<Code> code_object);
// References to the array that stores basic block usage counters start out as
// references to a unique oddball. Once the actual array has been allocated,
// such entries in the constants map must be patched up.
void PatchBasicBlockCountersReference(Handle<ByteArray> counters);
// Should be called after all affected code (e.g. builtins and bytecode
// handlers) has been generated.
void Finalize();
......
......@@ -11,6 +11,7 @@
#include "src/codegen/source-position-table.h"
#include "src/codegen/tick-counter.h"
#include "src/common/globals.h"
#include "src/diagnostics/basic-block-profiler.h"
#include "src/execution/frames.h"
#include "src/handles/handles.h"
#include "src/objects/objects.h"
......@@ -290,6 +291,11 @@ class V8_EXPORT_PRIVATE OptimizedCompilationInfo final {
TickCounter& tick_counter() { return tick_counter_; }
BasicBlockProfilerData* profiler_data() const { return profiler_data_; }
void set_profiler_data(BasicBlockProfilerData* profiler_data) {
profiler_data_ = profiler_data;
}
private:
OptimizedCompilationInfo(Code::Kind code_kind, Zone* zone);
void ConfigureFlags();
......@@ -318,6 +324,9 @@ class V8_EXPORT_PRIVATE OptimizedCompilationInfo final {
// The compiled code.
Handle<Code> code_;
// Basic block profiling support.
BasicBlockProfilerData* profiler_data_ = nullptr;
// The WebAssembly compilation result, not published in the NativeModule yet.
std::unique_ptr<wasm::WasmCompilationResult> wasm_compilation_result_;
......
......@@ -503,6 +503,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() {
.set_deoptimization_data(deopt_data)
.set_is_turbofanned()
.set_stack_slots(frame()->GetTotalFrameSlotCount())
.set_profiler_data(info()->profiler_data())
.TryBuild();
Handle<Code> code;
......
......@@ -37,16 +37,16 @@ static NodeVector::iterator FindInsertionPoint(BasicBlock* block) {
return i;
}
// TODO(dcarney): need to mark code as non-serializable.
static const Operator* PointerConstant(CommonOperatorBuilder* common,
intptr_t ptr) {
const void* ptr) {
intptr_t ptr_as_int = reinterpret_cast<intptr_t>(ptr);
return kSystemPointerSize == 8
? common->Int64Constant(ptr)
: common->Int32Constant(static_cast<int32_t>(ptr));
? common->Int64Constant(ptr_as_int)
: common->Int32Constant(static_cast<int32_t>(ptr_as_int));
}
BasicBlockProfiler::Data* BasicBlockInstrumentor::Instrument(
BasicBlockProfilerData* BasicBlockInstrumentor::Instrument(
OptimizedCompilationInfo* info, Graph* graph, Schedule* schedule,
Isolate* isolate) {
// Basic block profiling disables concurrent compilation, so handle deref is
......@@ -54,41 +54,68 @@ BasicBlockProfiler::Data* BasicBlockInstrumentor::Instrument(
AllowHandleDereference allow_handle_dereference;
// Skip the exit block in profiles, since the register allocator can't handle
// it and entry into it means falling off the end of the function anyway.
size_t n_blocks = static_cast<size_t>(schedule->RpoBlockCount()) - 1;
BasicBlockProfiler::Data* data = BasicBlockProfiler::Get()->NewData(n_blocks);
size_t n_blocks = schedule->RpoBlockCount() - 1;
BasicBlockProfilerData* data = BasicBlockProfiler::Get()->NewData(n_blocks);
// Set the function name.
data->SetFunctionName(info->GetDebugName());
// Capture the schedule string before instrumentation.
{
std::ostringstream os;
os << *schedule;
data->SetSchedule(&os);
data->SetSchedule(os);
}
// Check whether we should write counts to a JS heap object or to the
// BasicBlockProfilerData directly. The JS heap object is only used for
// builtins.
bool on_heap_counters = isolate && isolate->IsGeneratingEmbeddedBuiltins();
// Add the increment instructions to the start of every block.
CommonOperatorBuilder common(graph->zone());
Node* zero = graph->NewNode(common.Int32Constant(0));
Node* one = graph->NewNode(common.Int32Constant(1));
MachineOperatorBuilder machine(graph->zone());
Node* counters_array = nullptr;
if (on_heap_counters) {
// Allocation is disallowed here, so rather than referring to an actual
// counters array, create a reference to a special marker object. This
// object will get fixed up later in the constants table (see
// PatchBasicBlockCountersReference). An important and subtle point: we
// cannot use the root handle basic_block_counters_marker_handle() and must
// create a new separate handle. Otherwise
// TurboAssemblerBase::IndirectLoadConstant would helpfully emit a
// root-relative load rather than putting this value in the constants table
// where we expect it to be for patching.
counters_array = graph->NewNode(common.HeapConstant(Handle<HeapObject>::New(
ReadOnlyRoots(isolate).basic_block_counters_marker(), isolate)));
} else {
counters_array = graph->NewNode(PointerConstant(&common, data->counts()));
}
Node* one = graph->NewNode(common.Int32Constant(1));
BasicBlockVector* blocks = schedule->rpo_order();
size_t block_number = 0;
for (BasicBlockVector::iterator it = blocks->begin(); block_number < n_blocks;
++it, ++block_number) {
BasicBlock* block = (*it);
data->SetBlockRpoNumber(block_number, block->rpo_number());
// TODO(dcarney): wire effect and control deps for load and store.
// It is unnecessary to wire effect and control deps for load and store
// since this happens after scheduling.
// Construct increment operation.
Node* base = graph->NewNode(
PointerConstant(&common, data->GetCounterAddress(block_number)));
Node* load = graph->NewNode(machine.Load(MachineType::Uint32()), base, zero,
graph->start(), graph->start());
int offset_to_counter_value = static_cast<int>(block_number) * kInt32Size;
if (on_heap_counters) {
offset_to_counter_value += ByteArray::kHeaderSize - kHeapObjectTag;
}
Node* offset_to_counter =
graph->NewNode(common.Int32Constant(offset_to_counter_value));
Node* load =
graph->NewNode(machine.Load(MachineType::Uint32()), counters_array,
offset_to_counter, graph->start(), graph->start());
Node* inc = graph->NewNode(machine.Int32Add(), load, one);
Node* store =
graph->NewNode(machine.Store(StoreRepresentation(
MachineRepresentation::kWord32, kNoWriteBarrier)),
base, zero, inc, graph->start(), graph->start());
Node* store = graph->NewNode(
machine.Store(StoreRepresentation(MachineRepresentation::kWord32,
kNoWriteBarrier)),
counters_array, offset_to_counter, inc, graph->start(), graph->start());
// Insert the new nodes.
static const int kArraySize = 6;
Node* to_insert[kArraySize] = {zero, one, base, load, inc, store};
Node* to_insert[kArraySize] = {counters_array, one, offset_to_counter,
load, inc, store};
// The first two Nodes are constant across all blocks.
int insertion_start = block_number == 0 ? 0 : 2;
NodeVector::iterator insertion_point = FindInsertionPoint(block);
block->InsertNodes(insertion_point, &to_insert[insertion_start],
......
......@@ -20,9 +20,9 @@ class Schedule;
class BasicBlockInstrumentor : public AllStatic {
public:
static BasicBlockProfiler::Data* Instrument(OptimizedCompilationInfo* info,
Graph* graph, Schedule* schedule,
Isolate* isolate);
static BasicBlockProfilerData* Instrument(OptimizedCompilationInfo* info,
Graph* graph, Schedule* schedule,
Isolate* isolate);
};
} // namespace compiler
......
......@@ -349,11 +349,6 @@ class PipelineData {
return register_allocation_data_;
}
BasicBlockProfiler::Data* profiler_data() const { return profiler_data_; }
void set_profiler_data(BasicBlockProfiler::Data* profiler_data) {
profiler_data_ = profiler_data;
}
std::string const& source_position_output() const {
return source_position_output_;
}
......@@ -599,9 +594,6 @@ class PipelineData {
Zone* register_allocation_zone_;
RegisterAllocationData* register_allocation_data_ = nullptr;
// Basic block profiling support.
BasicBlockProfiler::Data* profiler_data_ = nullptr;
// Source position output for --trace-turbo.
std::string source_position_output_;
......@@ -2622,8 +2614,9 @@ MaybeHandle<Code> Pipeline::GenerateCodeForCodeStub(
ZoneStats zone_stats(isolate->allocator());
NodeOriginTable node_origins(graph);
JumpOptimizationInfo jump_opt;
bool should_optimize_jumps =
isolate->serializer_enabled() && FLAG_turbo_rewrite_far_jumps;
bool should_optimize_jumps = isolate->serializer_enabled() &&
FLAG_turbo_rewrite_far_jumps &&
!FLAG_turbo_profiling;
PipelineData data(&zone_stats, &info, isolate, isolate->allocator(), graph,
jsgraph, nullptr, source_positions, &node_origins,
should_optimize_jumps ? &jump_opt : nullptr, options);
......@@ -3055,7 +3048,7 @@ bool PipelineImpl::SelectInstructions(Linkage* linkage) {
DCHECK_NOT_NULL(data->schedule());
if (FLAG_turbo_profiling) {
data->set_profiler_data(BasicBlockInstrumentor::Instrument(
data->info()->set_profiler_data(BasicBlockInstrumentor::Instrument(
info(), data->graph(), data->schedule(), data->isolate()));
}
......@@ -3273,14 +3266,6 @@ MaybeHandle<Code> PipelineImpl::FinalizeCode(bool retire_broker) {
return maybe_code;
}
if (data->profiler_data()) {
#ifdef ENABLE_DISASSEMBLER
std::ostringstream os;
code->Disassemble(nullptr, os, isolate());
data->profiler_data()->SetCode(&os);
#endif // ENABLE_DISASSEMBLER
}
info()->SetCode(code);
PrintCode(isolate(), code, info());
......
......@@ -2169,8 +2169,9 @@ void Shell::WriteLcovData(v8::Isolate* isolate, const char* file) {
void Shell::OnExit(v8::Isolate* isolate) {
// Dump basic block profiling data.
if (i::FLAG_turbo_profiling) {
i::BasicBlockProfiler* profiler = i::BasicBlockProfiler::Get();
i::StdoutStream{} << *profiler;
i::StdoutStream out;
i::BasicBlockProfiler::Get()->Print(out,
reinterpret_cast<i::Isolate*>(isolate));
}
isolate->Dispose();
......
......@@ -9,84 +9,132 @@
#include <sstream>
#include "src/base/lazy-instance.h"
#include "src/heap/heap-inl.h"
#include "torque-generated/exported-class-definitions-tq-inl.h"
namespace v8 {
namespace internal {
DEFINE_LAZY_LEAKY_OBJECT_GETTER(BasicBlockProfiler, BasicBlockProfiler::Get)
BasicBlockProfiler::Data::Data(size_t n_blocks)
: n_blocks_(n_blocks),
block_rpo_numbers_(n_blocks_),
counts_(n_blocks_, 0) {}
BasicBlockProfilerData::BasicBlockProfilerData(size_t n_blocks)
: block_rpo_numbers_(n_blocks), counts_(n_blocks, 0) {}
static void InsertIntoString(std::ostringstream* os, std::string* string) {
string->insert(0, os->str());
void BasicBlockProfilerData::SetCode(const std::ostringstream& os) {
code_ = os.str();
}
static void InsertIntoString(const char* data, std::string* string) {
string->insert(0, data);
void BasicBlockProfilerData::SetFunctionName(std::unique_ptr<char[]> name) {
function_name_ = name.get();
}
void BasicBlockProfiler::Data::SetCode(std::ostringstream* os) {
InsertIntoString(os, &code_);
void BasicBlockProfilerData::SetSchedule(const std::ostringstream& os) {
schedule_ = os.str();
}
void BasicBlockProfiler::Data::SetFunctionName(std::unique_ptr<char[]> name) {
InsertIntoString(name.get(), &function_name_);
void BasicBlockProfilerData::SetBlockRpoNumber(size_t offset,
int32_t block_rpo) {
DCHECK(offset < n_blocks());
block_rpo_numbers_[offset] = block_rpo;
}
void BasicBlockProfiler::Data::SetSchedule(std::ostringstream* os) {
InsertIntoString(os, &schedule_);
void BasicBlockProfilerData::ResetCounts() {
for (size_t i = 0; i < n_blocks(); ++i) {
counts_[i] = 0;
}
}
void BasicBlockProfiler::Data::SetBlockRpoNumber(size_t offset,
int32_t block_rpo) {
DCHECK(offset < n_blocks_);
block_rpo_numbers_[offset] = block_rpo;
BasicBlockProfilerData* BasicBlockProfiler::NewData(size_t n_blocks) {
base::MutexGuard lock(&data_list_mutex_);
auto data = std::make_unique<BasicBlockProfilerData>(n_blocks);
BasicBlockProfilerData* data_ptr = data.get();
data_list_.push_back(std::move(data));
return data_ptr;
}
intptr_t BasicBlockProfiler::Data::GetCounterAddress(size_t offset) {
DCHECK(offset < n_blocks_);
return reinterpret_cast<intptr_t>(&(counts_[offset]));
namespace {
Handle<String> CopyStringToJSHeap(const std::string& source, Isolate* isolate) {
return isolate->factory()->NewStringFromAsciiChecked(source.c_str(),
AllocationType::kOld);
}
void BasicBlockProfiler::Data::ResetCounts() {
for (size_t i = 0; i < n_blocks_; ++i) {
counts_[i] = 0;
// Size of entries in both block_rpo_numbers and counts.
constexpr int kBasicBlockSlotSize = kInt32Size;
} // namespace
BasicBlockProfilerData::BasicBlockProfilerData(
Handle<OnHeapBasicBlockProfilerData> js_heap_data, Isolate* isolate) {
function_name_ = js_heap_data->name().ToCString().get();
schedule_ = js_heap_data->schedule().ToCString().get();
code_ = js_heap_data->code().ToCString().get();
Handle<ByteArray> counts(js_heap_data->counts(), isolate);
for (int i = 0; i < counts->length() / kBasicBlockSlotSize; ++i) {
counts_.push_back(counts->get_uint32(i));
}
Handle<ByteArray> rpo_numbers(js_heap_data->block_rpo_numbers(), isolate);
for (int i = 0; i < rpo_numbers->length() / kBasicBlockSlotSize; ++i) {
block_rpo_numbers_.push_back(rpo_numbers->get_int(i));
}
CHECK_EQ(block_rpo_numbers_.size(), counts_.size());
}
BasicBlockProfiler::Data* BasicBlockProfiler::NewData(size_t n_blocks) {
base::MutexGuard lock(&data_list_mutex_);
Data* data = new Data(n_blocks);
data_list_.push_back(data);
return data;
}
BasicBlockProfiler::~BasicBlockProfiler() {
for (DataList::iterator i = data_list_.begin(); i != data_list_.end(); ++i) {
delete (*i);
Handle<OnHeapBasicBlockProfilerData> BasicBlockProfilerData::CopyToJSHeap(
Isolate* isolate) {
int array_size_in_bytes = static_cast<int>(n_blocks() * kBasicBlockSlotSize);
CHECK(array_size_in_bytes >= 0 &&
static_cast<size_t>(array_size_in_bytes) / kBasicBlockSlotSize ==
n_blocks()); // Overflow
Handle<ByteArray> block_rpo_numbers = isolate->factory()->NewByteArray(
array_size_in_bytes, AllocationType::kOld);
for (int i = 0; i < static_cast<int>(n_blocks()); ++i) {
block_rpo_numbers->set_int(i, block_rpo_numbers_[i]);
}
Handle<ByteArray> counts = isolate->factory()->NewByteArray(
array_size_in_bytes, AllocationType::kOld);
for (int i = 0; i < static_cast<int>(n_blocks()); ++i) {
counts->set_uint32(i, counts_[i]);
}
Handle<String> name = CopyStringToJSHeap(function_name_, isolate);
Handle<String> schedule = CopyStringToJSHeap(schedule_, isolate);
Handle<String> code = CopyStringToJSHeap(code_, isolate);
return isolate->factory()->NewOnHeapBasicBlockProfilerData(
block_rpo_numbers, counts, name, schedule, code, AllocationType::kOld);
}
void BasicBlockProfiler::ResetCounts() {
for (DataList::iterator i = data_list_.begin(); i != data_list_.end(); ++i) {
(*i)->ResetCounts();
void BasicBlockProfiler::ResetCounts(Isolate* isolate) {
for (const auto& data : data_list_) {
data->ResetCounts();
}
Handle<ArrayList> list(isolate->heap()->basic_block_profiling_data(),
isolate);
for (int i = 0; i < list->Length(); ++i) {
Handle<ByteArray> counts(
OnHeapBasicBlockProfilerData::cast(list->Get(i)).counts(), isolate);
for (int j = 0; j < counts->length() / kBasicBlockSlotSize; ++j) {
counts->set_uint32(j, 0);
}
}
}
std::ostream& operator<<(std::ostream& os, const BasicBlockProfiler& p) {
void BasicBlockProfiler::Print(std::ostream& os, Isolate* isolate) {
os << "---- Start Profiling Data ----" << std::endl;
using iterator = BasicBlockProfiler::DataList::const_iterator;
for (iterator i = p.data_list_.begin(); i != p.data_list_.end(); ++i) {
os << **i;
for (const auto& data : data_list_) {
os << *data;
}
HandleScope scope(isolate);
Handle<ArrayList> list(isolate->heap()->basic_block_profiling_data(),
isolate);
for (int i = 0; i < list->Length(); ++i) {
BasicBlockProfilerData data(
handle(OnHeapBasicBlockProfilerData::cast(list->Get(i)), isolate),
isolate);
os << data;
}
os << "---- End Profiling Data ----" << std::endl;
return os;
}
std::ostream& operator<<(std::ostream& os, const BasicBlockProfiler::Data& d) {
std::ostream& operator<<(std::ostream& os, const BasicBlockProfilerData& d) {
int block_count_sum = std::accumulate(d.counts_.begin(), d.counts_.end(), 0);
if (block_count_sum == 0) return os;
const char* name = "unknown function";
......@@ -100,8 +148,8 @@ std::ostream& operator<<(std::ostream& os, const BasicBlockProfiler::Data& d) {
}
os << "block counts for " << name << ":" << std::endl;
std::vector<std::pair<int32_t, uint32_t>> pairs;
pairs.reserve(d.n_blocks_);
for (size_t i = 0; i < d.n_blocks_; ++i) {
pairs.reserve(d.n_blocks());
for (size_t i = 0; i < d.n_blocks(); ++i) {
pairs.push_back(std::make_pair(d.block_rpo_numbers_[i], d.counts_[i]));
}
std::sort(pairs.begin(), pairs.end(),
......
......@@ -14,66 +14,70 @@
#include "src/base/macros.h"
#include "src/base/platform/mutex.h"
#include "src/common/globals.h"
#include "torque-generated/exported-class-definitions-tq.h"
namespace v8 {
namespace internal {
class BasicBlockProfilerData {
public:
explicit BasicBlockProfilerData(size_t n_blocks);
V8_EXPORT_PRIVATE BasicBlockProfilerData(
Handle<OnHeapBasicBlockProfilerData> js_heap_data, Isolate* isolate);
size_t n_blocks() const {
DCHECK_EQ(block_rpo_numbers_.size(), counts_.size());
return block_rpo_numbers_.size();
}
const uint32_t* counts() const { return &counts_[0]; }
void SetCode(const std::ostringstream& os);
void SetFunctionName(std::unique_ptr<char[]> name);
void SetSchedule(const std::ostringstream& os);
void SetBlockRpoNumber(size_t offset, int32_t block_rpo);
// Copy the data from this object into an equivalent object stored on the JS
// heap, so that it can survive snapshotting and relocation. This must
// happen on the main thread during finalization of the compilation.
Handle<OnHeapBasicBlockProfilerData> CopyToJSHeap(Isolate* isolate);
private:
friend class BasicBlockProfiler;
friend std::ostream& operator<<(std::ostream& os,
const BasicBlockProfilerData& s);
V8_EXPORT_PRIVATE void ResetCounts();
std::vector<int32_t> block_rpo_numbers_;
std::vector<uint32_t> counts_;
std::string function_name_;
std::string schedule_;
std::string code_;
DISALLOW_COPY_AND_ASSIGN(BasicBlockProfilerData);
};
class BasicBlockProfiler {
public:
class Data {
public:
size_t n_blocks() const { return n_blocks_; }
const uint32_t* counts() const { return &counts_[0]; }
void SetCode(std::ostringstream* os);
void SetFunctionName(std::unique_ptr<char[]> name);
void SetSchedule(std::ostringstream* os);
void SetBlockRpoNumber(size_t offset, int32_t block_rpo);
intptr_t GetCounterAddress(size_t offset);
private:
friend class BasicBlockProfiler;
friend std::ostream& operator<<(std::ostream& os,
const BasicBlockProfiler::Data& s);
explicit Data(size_t n_blocks);
~Data() = default;
V8_EXPORT_PRIVATE void ResetCounts();
const size_t n_blocks_;
std::vector<int32_t> block_rpo_numbers_;
std::vector<uint32_t> counts_;
std::string function_name_;
std::string schedule_;
std::string code_;
DISALLOW_COPY_AND_ASSIGN(Data);
};
using DataList = std::list<Data*>;
using DataList = std::list<std::unique_ptr<BasicBlockProfilerData>>;
BasicBlockProfiler() = default;
~BasicBlockProfiler();
~BasicBlockProfiler() = default;
V8_EXPORT_PRIVATE static BasicBlockProfiler* Get();
Data* NewData(size_t n_blocks);
V8_EXPORT_PRIVATE void ResetCounts();
BasicBlockProfilerData* NewData(size_t n_blocks);
V8_EXPORT_PRIVATE void ResetCounts(Isolate* isolate);
V8_EXPORT_PRIVATE void Print(std::ostream& os, Isolate* isolate);
const DataList* data_list() { return &data_list_; }
private:
friend V8_EXPORT_PRIVATE std::ostream& operator<<(
std::ostream& os, const BasicBlockProfiler& s);
DataList data_list_;
base::Mutex data_list_mutex_;
DISALLOW_COPY_AND_ASSIGN(BasicBlockProfiler);
};
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
const BasicBlockProfiler& s);
std::ostream& operator<<(std::ostream& os, const BasicBlockProfiler::Data& s);
std::ostream& operator<<(std::ostream& os, const BasicBlockProfilerData& s);
} // namespace internal
} // namespace v8
......
......@@ -926,6 +926,8 @@ void Oddball::OddballVerify(Isolate* isolate) {
} else if (map() == roots.self_reference_marker_map()) {
// Multiple instances of this oddball may exist at once.
CHECK_EQ(kind(), Oddball::kSelfReferenceMarker);
} else if (map() == roots.basic_block_counters_marker_map()) {
CHECK(*this == roots.basic_block_counters_marker());
} else {
UNREACHABLE();
}
......
......@@ -15,6 +15,7 @@
#include "src/builtins/constants-table-builder.h"
#include "src/codegen/compiler.h"
#include "src/common/globals.h"
#include "src/diagnostics/basic-block-profiler.h"
#include "src/execution/isolate-inl.h"
#include "src/execution/protectors-inl.h"
#include "src/heap/heap-inl.h"
......@@ -118,6 +119,22 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal(
data_container->set_kind_specific_flags(kind_specific_flags_);
}
// Basic block profiling data for builtins is stored in the JS heap rather
// than in separately-allocated C++ objects. Allocate that data now if
// appropriate.
Handle<OnHeapBasicBlockProfilerData> on_heap_profiler_data;
if (profiler_data_ && isolate_->IsGeneratingEmbeddedBuiltins()) {
on_heap_profiler_data = profiler_data_->CopyToJSHeap(isolate_);
// Add the on-heap data to a global list, which keeps it alive and allows
// iteration.
Handle<ArrayList> list(isolate_->heap()->basic_block_profiling_data(),
isolate_);
Handle<ArrayList> new_list =
ArrayList::Add(isolate_, list, on_heap_profiler_data);
isolate_->heap()->SetBasicBlockProfilingData(new_list);
}
Handle<Code> code;
{
int object_size = ComputeCodeObjectSize(code_desc_);
......@@ -189,6 +206,14 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal(
*(self_reference.location()) = code->ptr();
}
// Likewise, any references to the basic block counters marker need to be
// updated to point to the newly-allocated counters array.
if (!on_heap_profiler_data.is_null()) {
isolate_->builtins_constants_table_builder()
->PatchBasicBlockCountersReference(
handle(on_heap_profiler_data->counts(), isolate_));
}
// Migrate generated code.
// The generated code can contain embedded objects (typically from handles)
// in a pointer-to-tagged-value format (i.e. with indirection like a handle)
......@@ -211,6 +236,21 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal(
code->FlushICache();
}
if (profiler_data_) {
#ifdef ENABLE_DISASSEMBLER
std::ostringstream os;
code->Disassemble(nullptr, os, isolate_);
if (!on_heap_profiler_data.is_null()) {
Handle<String> disassembly =
isolate_->factory()->NewStringFromAsciiChecked(os.str().c_str(),
AllocationType::kOld);
on_heap_profiler_data->set_code(*disassembly);
} else {
profiler_data_->SetCode(os);
}
#endif // ENABLE_DISASSEMBLER
}
return code;
}
......@@ -325,6 +365,13 @@ Handle<Oddball> Factory::NewSelfReferenceMarker() {
Oddball::kSelfReferenceMarker);
}
Handle<Oddball> Factory::NewBasicBlockCountersMarker() {
return NewOddball(basic_block_counters_marker_map(),
"basic_block_counters_marker",
handle(Smi::FromInt(-1), isolate()), "undefined",
Oddball::kBasicBlockCountersMarker);
}
Handle<PropertyArray> Factory::NewPropertyArray(int length) {
DCHECK_LE(0, length);
if (length == 0) return empty_property_array();
......
......@@ -26,6 +26,7 @@ namespace internal {
// Forward declarations.
class AliasedArgumentsEntry;
class ObjectBoilerplateDescription;
class BasicBlockProfilerData;
class BreakPoint;
class BreakPointInfo;
class CallableTask;
......@@ -119,6 +120,10 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
// Marks self references within code generation.
Handle<Oddball> NewSelfReferenceMarker();
// Marks references to a function's basic-block usage counters array during
// code generation.
Handle<Oddball> NewBasicBlockCountersMarker();
// Allocates a property array initialized with undefined values.
Handle<PropertyArray> NewPropertyArray(int length);
// Tries allocating a fixed array initialized with undefined values.
......@@ -861,6 +866,11 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
return *this;
}
CodeBuilder& set_profiler_data(BasicBlockProfilerData* profiler_data) {
profiler_data_ = profiler_data;
return *this;
}
private:
MaybeHandle<Code> BuildInternal(bool retry_allocation_or_fail);
......@@ -875,6 +885,7 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
Handle<ByteArray> source_position_table_;
Handle<DeoptimizationData> deoptimization_data_ =
DeoptimizationData::Empty(isolate_);
BasicBlockProfilerData* profiler_data_ = nullptr;
bool is_executable_ = true;
bool read_only_data_container_ = false;
bool is_movable_ = true;
......
......@@ -176,6 +176,10 @@ void Heap::SetSerializedGlobalProxySizes(FixedArray sizes) {
set_serialized_global_proxy_sizes(sizes);
}
void Heap::SetBasicBlockProfilingData(Handle<ArrayList> list) {
set_basic_block_profiling_data(*list);
}
bool Heap::GCCallbackTuple::operator==(
const Heap::GCCallbackTuple& other) const {
return other.callback == callback && other.data == data;
......
......@@ -663,6 +663,8 @@ class Heap {
void SetSerializedObjects(FixedArray objects);
void SetSerializedGlobalProxySizes(FixedArray sizes);
void SetBasicBlockProfilingData(Handle<ArrayList> list);
// For post mortem debugging.
void RememberUnmappedPage(Address page, bool compacted);
......
......@@ -393,6 +393,7 @@ bool Heap::CreateInitialMaps() {
ALLOCATE_MAP(ODDBALL_TYPE, Oddball::kSize, optimized_out);
ALLOCATE_MAP(ODDBALL_TYPE, Oddball::kSize, stale_register);
ALLOCATE_MAP(ODDBALL_TYPE, Oddball::kSize, self_reference_marker);
ALLOCATE_MAP(ODDBALL_TYPE, Oddball::kSize, basic_block_counters_marker);
ALLOCATE_VARSIZE_MAP(BIGINT_TYPE, bigint);
for (unsigned i = 0; i < arraysize(string_type_table); i++) {
......@@ -717,8 +718,9 @@ void Heap::CreateInitialObjects() {
handle(Smi::FromInt(-7), isolate()), "undefined",
Oddball::kStaleRegister));
// Initialize the self-reference marker.
// Initialize marker objects used during compilation.
set_self_reference_marker(*factory->NewSelfReferenceMarker());
set_basic_block_counters_marker(*factory->NewBasicBlockCountersMarker());
set_interpreter_entry_trampoline_for_profiling(roots.undefined_value());
......@@ -769,6 +771,8 @@ void Heap::CreateInitialObjects() {
set_number_string_cache(*factory->NewFixedArray(
kInitialNumberStringCacheSize * 2, AllocationType::kOld));
set_basic_block_profiling_data(ArrayList::cast(roots.empty_fixed_array()));
// Allocate cache for string split and regexp-multiple.
set_string_split_cache(*factory->NewFixedArray(
RegExpResultsCache::kRegExpResultsCacheSize, AllocationType::kOld));
......
......@@ -47,6 +47,7 @@ class Oddball : public TorqueGeneratedOddball<Oddball, PrimitiveHeapObject> {
static const byte kOptimizedOut = 9;
static const byte kStaleRegister = 10;
static const byte kSelfReferenceMarker = 10;
static const byte kBasicBlockCountersMarker = 11;
static_assert(kStartOfWeakFieldsOffset == kEndOfWeakFieldsOffset,
"Ensure BodyDescriptor does not need to handle weak fields.");
......
......@@ -77,3 +77,12 @@ extern class UncompiledDataWithoutPreparseData extends UncompiledData {
extern class UncompiledDataWithPreparseData extends UncompiledData {
preparse_data: PreparseData;
}
@export
class OnHeapBasicBlockProfilerData extends HeapObject {
block_rpo_numbers: ByteArray; // Stored as 4-byte ints
counts: ByteArray; // Stored as 4-byte ints
name: String;
schedule: String;
code: String;
}
......@@ -156,6 +156,7 @@ class Symbol;
V(Map, optimized_out_map, OptimizedOutMap) \
V(Map, stale_register_map, StaleRegisterMap) \
V(Map, self_reference_marker_map, SelfReferenceMarkerMap) \
V(Map, basic_block_counters_marker_map, BasicBlockCountersMarkerMap) \
/* Canonical empty values */ \
V(EnumCache, empty_enum_cache, EmptyEnumCache) \
V(PropertyArray, empty_property_array, EmptyPropertyArray) \
......@@ -185,6 +186,8 @@ class Symbol;
V(HeapNumber, minus_infinity_value, MinusInfinityValue) \
/* Marker for self-references during code-generation */ \
V(HeapObject, self_reference_marker, SelfReferenceMarker) \
/* Marker for basic-block usage counters array during code-generation */ \
V(Oddball, basic_block_counters_marker, BasicBlockCountersMarker) \
/* Canonical off-heap trampoline data */ \
V(ByteArray, off_heap_trampoline_relocation_info, \
OffHeapTrampolineRelocationInfo) \
......@@ -303,6 +306,7 @@ class Symbol;
InterpreterEntryTrampolineForProfiling) \
V(Object, pending_optimize_for_test_bytecode, \
PendingOptimizeForTestBytecode) \
V(ArrayList, basic_block_profiling_data, BasicBlockProfilingData) \
V(WeakArrayList, shared_wasm_memories, SharedWasmMemories)
// Entries in this list are limited to Smis and are not visited during GC.
......
......@@ -18,13 +18,15 @@ class BasicBlockProfilerTest : public RawMachineAssemblerTester<int32_t> {
FLAG_turbo_profiling = true;
}
void ResetCounts() { BasicBlockProfiler::Get()->ResetCounts(); }
void ResetCounts() {
BasicBlockProfiler::Get()->ResetCounts(CcTest::i_isolate());
}
void Expect(size_t size, uint32_t* expected) {
const BasicBlockProfiler::DataList* l =
BasicBlockProfiler::Get()->data_list();
CHECK_NE(0, static_cast<int>(l->size()));
const BasicBlockProfiler::Data* data = l->back();
const BasicBlockProfilerData* data = l->back().get();
CHECK_EQ(static_cast<int>(size), static_cast<int>(data->n_blocks()));
const uint32_t* counts = data->counts();
for (size_t i = 0; i < size; ++i) {
......
......@@ -42,6 +42,7 @@ bool IsInitiallyMutable(Factory* factory, Address object_address) {
#define INITIALLY_READ_ONLY_ROOT_LIST(V) \
V(api_private_symbol_table) \
V(api_symbol_table) \
V(basic_block_profiling_data) \
V(builtins_constants_table) \
V(current_microtask) \
V(detached_contexts) \
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment