Commit 905aafab authored by Emanuel Ziegler's avatar Emanuel Ziegler Committed by V8 LUCI CQ

[wasm] Add CPU time metrics (reland)

This is a reland of dcdaf42f. It adds
CPU time metrics to the WasmModuleDecoded (except for streaming),
WasmModuleCompiled and WasmModuleTieredUp events. This can later be used
to provide this information as UKMs or UMAs.

Bug: v8:11611
Change-Id: I813fc8de36d1445c6a887abf496ec10e1a803815
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2953296Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Emanuel Ziegler <ecmziegler@chromium.org>
Cr-Commit-Position: refs/heads/master@{#75715}
parent 3e405e3e
...@@ -78,6 +78,7 @@ struct WasmModuleDecoded { ...@@ -78,6 +78,7 @@ struct WasmModuleDecoded {
size_t module_size_in_bytes = 0; size_t module_size_in_bytes = 0;
size_t function_count = 0; size_t function_count = 0;
int64_t wall_clock_duration_in_us = -1; int64_t wall_clock_duration_in_us = -1;
int64_t cpu_duration_in_us = -1;
}; };
struct WasmModuleCompiled { struct WasmModuleCompiled {
...@@ -90,6 +91,7 @@ struct WasmModuleCompiled { ...@@ -90,6 +91,7 @@ struct WasmModuleCompiled {
size_t code_size_in_bytes = 0; size_t code_size_in_bytes = 0;
size_t liftoff_bailout_count = 0; size_t liftoff_bailout_count = 0;
int64_t wall_clock_duration_in_us = -1; int64_t wall_clock_duration_in_us = -1;
int64_t cpu_duration_in_us = -1;
}; };
struct WasmModuleInstantiated { struct WasmModuleInstantiated {
...@@ -103,6 +105,7 @@ struct WasmModuleTieredUp { ...@@ -103,6 +105,7 @@ struct WasmModuleTieredUp {
bool lazy = false; bool lazy = false;
size_t code_size_in_bytes = 0; size_t code_size_in_bytes = 0;
int64_t wall_clock_duration_in_us = -1; int64_t wall_clock_duration_in_us = -1;
int64_t cpu_duration_in_us = -1;
}; };
struct WasmModulesPerIsolate { struct WasmModulesPerIsolate {
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "src/wasm/function-compiler.h" #include "src/wasm/function-compiler.h"
#include "src/base/platform/time.h"
#include "src/base/strings.h" #include "src/base/strings.h"
#include "src/codegen/compiler.h" #include "src/codegen/compiler.h"
#include "src/codegen/macro-assembler-inl.h" #include "src/codegen/macro-assembler-inl.h"
......
...@@ -71,6 +71,7 @@ class V8_EXPORT_PRIVATE WasmCompilationUnit final { ...@@ -71,6 +71,7 @@ class V8_EXPORT_PRIVATE WasmCompilationUnit final {
WasmFeatures* detected); WasmFeatures* detected);
ExecutionTier tier() const { return tier_; } ExecutionTier tier() const { return tier_; }
ForDebugging for_debugging() const { return for_debugging_; }
int func_index() const { return func_index_; } int func_index() const { return func_index_; }
static void CompileWasmFunction(Isolate*, NativeModule*, static void CompileWasmFunction(Isolate*, NativeModule*,
......
...@@ -1138,6 +1138,10 @@ bool CompileLazy(Isolate* isolate, Handle<WasmModuleObject> module_object, ...@@ -1138,6 +1138,10 @@ bool CompileLazy(Isolate* isolate, Handle<WasmModuleObject> module_object,
TRACE_LAZY("Compiling wasm-function#%d.\n", func_index); TRACE_LAZY("Compiling wasm-function#%d.\n", func_index);
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
CompilationStateImpl* compilation_state = CompilationStateImpl* compilation_state =
Impl(native_module->compilation_state()); Impl(native_module->compilation_state());
ExecutionTierPair tiers = ExecutionTierPair tiers =
...@@ -1154,6 +1158,11 @@ bool CompileLazy(Isolate* isolate, Handle<WasmModuleObject> module_object, ...@@ -1154,6 +1158,11 @@ bool CompileLazy(Isolate* isolate, Handle<WasmModuleObject> module_object,
&env, compilation_state->GetWireBytesStorage().get(), counters, &env, compilation_state->GetWireBytesStorage().get(), counters,
&detected_features); &detected_features);
compilation_state->OnCompilationStopped(detected_features); compilation_state->OnCompilationStopped(detected_features);
if (!thread_ticks.IsNull()) {
native_module->UpdateCPUDuration(
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
tiers.baseline_tier);
}
// During lazy compilation, we can only get compilation errors when // During lazy compilation, we can only get compilation errors when
// {--wasm-lazy-validation} is enabled. Otherwise, the module was fully // {--wasm-lazy-validation} is enabled. Otherwise, the module was fully
...@@ -1312,6 +1321,10 @@ CompilationExecutionResult ExecuteCompilationUnits( ...@@ -1312,6 +1321,10 @@ CompilationExecutionResult ExecuteCompilationUnits(
WasmFeatures detected_features = WasmFeatures::None(); WasmFeatures detected_features = WasmFeatures::None();
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
// Preparation (synchronized): Initialize the fields above and get the first // Preparation (synchronized): Initialize the fields above and get the first
// compilation unit. // compilation unit.
{ {
...@@ -1349,10 +1362,19 @@ CompilationExecutionResult ExecuteCompilationUnits( ...@@ -1349,10 +1362,19 @@ CompilationExecutionResult ExecuteCompilationUnits(
return kNoMoreUnits; return kNoMoreUnits;
} }
if (!unit->for_debugging() && result.result_tier != current_tier) {
compile_scope.native_module()->AddLiftoffBailout();
}
// Yield or get next unit. // Yield or get next unit.
if (yield || if (yield ||
!(unit = compile_scope.compilation_state()->GetNextCompilationUnit( !(unit = compile_scope.compilation_state()->GetNextCompilationUnit(
queue, baseline_only))) { queue, baseline_only))) {
if (!thread_ticks.IsNull()) {
compile_scope.native_module()->UpdateCPUDuration(
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
current_tier);
}
std::vector<std::unique_ptr<WasmCode>> unpublished_code = std::vector<std::unique_ptr<WasmCode>> unpublished_code =
compile_scope.native_module()->AddCompiledCode( compile_scope.native_module()->AddCompiledCode(
base::VectorOf(std::move(results_to_publish))); base::VectorOf(std::move(results_to_publish)));
...@@ -1374,6 +1396,12 @@ CompilationExecutionResult ExecuteCompilationUnits( ...@@ -1374,6 +1396,12 @@ CompilationExecutionResult ExecuteCompilationUnits(
bool liftoff_finished = unit->tier() != current_tier && bool liftoff_finished = unit->tier() != current_tier &&
unit->tier() == ExecutionTier::kTurbofan; unit->tier() == ExecutionTier::kTurbofan;
if (batch_full || liftoff_finished) { if (batch_full || liftoff_finished) {
if (!thread_ticks.IsNull()) {
base::ThreadTicks thread_ticks_now = base::ThreadTicks::Now();
compile_scope.native_module()->UpdateCPUDuration(
(thread_ticks_now - thread_ticks).InMicroseconds(), current_tier);
thread_ticks = thread_ticks_now;
}
std::vector<std::unique_ptr<WasmCode>> unpublished_code = std::vector<std::unique_ptr<WasmCode>> unpublished_code =
compile_scope.native_module()->AddCompiledCode( compile_scope.native_module()->AddCompiledCode(
base::VectorOf(std::move(results_to_publish))); base::VectorOf(std::move(results_to_publish)));
...@@ -1519,7 +1547,6 @@ class CompilationTimeCallback { ...@@ -1519,7 +1547,6 @@ class CompilationTimeCallback {
histogram->AddSample(static_cast<int>(duration.InMicroseconds())); histogram->AddSample(static_cast<int>(duration.InMicroseconds()));
} }
// TODO(sartang@microsoft.com): Remove wall_clock_time_in_us field
v8::metrics::WasmModuleCompiled event{ v8::metrics::WasmModuleCompiled event{
(compile_mode_ != kSynchronous), // async (compile_mode_ != kSynchronous), // async
(compile_mode_ == kStreaming), // streamed (compile_mode_ == kStreaming), // streamed
...@@ -1529,8 +1556,9 @@ class CompilationTimeCallback { ...@@ -1529,8 +1556,9 @@ class CompilationTimeCallback {
true, // success true, // success
native_module->liftoff_code_size(), // code_size_in_bytes native_module->liftoff_code_size(), // code_size_in_bytes
native_module->liftoff_bailout_count(), // liftoff_bailout_count native_module->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds() // wall_clock_duration_in_us duration.InMicroseconds(), // wall_clock_duration_in_us
}; static_cast<int64_t>( // cpu_time_duration_in_us
native_module->baseline_compilation_cpu_duration())};
metrics_recorder_->DelayMainThreadEvent(event, context_id_); metrics_recorder_->DelayMainThreadEvent(event, context_id_);
} }
if (event == CompilationEvent::kFinishedTopTierCompilation) { if (event == CompilationEvent::kFinishedTopTierCompilation) {
...@@ -1540,8 +1568,9 @@ class CompilationTimeCallback { ...@@ -1540,8 +1568,9 @@ class CompilationTimeCallback {
v8::metrics::WasmModuleTieredUp event{ v8::metrics::WasmModuleTieredUp event{
FLAG_wasm_lazy_compilation, // lazy FLAG_wasm_lazy_compilation, // lazy
native_module->turbofan_code_size(), // code_size_in_bytes native_module->turbofan_code_size(), // code_size_in_bytes
duration.InMicroseconds() // wall_clock_duration_in_us duration.InMicroseconds(), // wall_clock_duration_in_us
}; static_cast<int64_t>( // cpu_time_duration_in_us
native_module->tier_up_cpu_duration())};
metrics_recorder_->DelayMainThreadEvent(event, context_id_); metrics_recorder_->DelayMainThreadEvent(event, context_id_);
} }
if (event == CompilationEvent::kFailedCompilation) { if (event == CompilationEvent::kFailedCompilation) {
...@@ -1554,8 +1583,9 @@ class CompilationTimeCallback { ...@@ -1554,8 +1583,9 @@ class CompilationTimeCallback {
false, // success false, // success
native_module->liftoff_code_size(), // code_size_in_bytes native_module->liftoff_code_size(), // code_size_in_bytes
native_module->liftoff_bailout_count(), // liftoff_bailout_count native_module->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds() // wall_clock_duration_in_us duration.InMicroseconds(), // wall_clock_duration_in_us
}; static_cast<int64_t>( // cpu_time_duration_in_us
native_module->baseline_compilation_cpu_duration())};
metrics_recorder_->DelayMainThreadEvent(event, context_id_); metrics_recorder_->DelayMainThreadEvent(event, context_id_);
} }
} }
...@@ -1954,10 +1984,11 @@ void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) { ...@@ -1954,10 +1984,11 @@ void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) {
is_after_deserialization, // deserialized is_after_deserialization, // deserialized
wasm_lazy_compilation_, // lazy wasm_lazy_compilation_, // lazy
!compilation_state->failed(), // success !compilation_state->failed(), // success
native_module_->liftoff_code_size(), // code_size_in_bytes native_module_->turbofan_code_size(), // code_size_in_bytes
native_module_->liftoff_bailout_count(), // liftoff_bailout_count native_module_->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds() // wall_clock_duration_in_us duration.InMicroseconds(), // wall_clock_duration_in_us
}; static_cast<int64_t>( // cpu_time_duration_in_us
native_module_->baseline_compilation_cpu_duration())};
isolate_->metrics_recorder()->DelayMainThreadEvent(event, context_id_); isolate_->metrics_recorder()->DelayMainThreadEvent(event, context_id_);
} }
} }
......
...@@ -2068,12 +2068,19 @@ ModuleResult DecodeWasmModule( ...@@ -2068,12 +2068,19 @@ ModuleResult DecodeWasmModule(
v8::metrics::WasmModuleDecoded metrics_event; v8::metrics::WasmModuleDecoded metrics_event;
base::ElapsedTimer timer; base::ElapsedTimer timer;
timer.Start(); timer.Start();
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
ModuleResult result = ModuleResult result =
decoder.DecodeModule(counters, allocator, verify_functions); decoder.DecodeModule(counters, allocator, verify_functions);
// Record event metrics. // Record event metrics.
metrics_event.wall_clock_duration_in_us = timer.Elapsed().InMicroseconds(); metrics_event.wall_clock_duration_in_us = timer.Elapsed().InMicroseconds();
timer.Stop(); timer.Stop();
if (!thread_ticks.IsNull()) {
metrics_event.cpu_duration_in_us =
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds();
}
metrics_event.success = decoder.ok() && result.ok(); metrics_event.success = decoder.ok() && result.ok();
metrics_event.async = decoding_method == DecodingMethod::kAsync || metrics_event.async = decoding_method == DecodingMethod::kAsync ||
decoding_method == DecodingMethod::kAsyncStream; decoding_method == DecodingMethod::kAsyncStream;
......
...@@ -1234,10 +1234,6 @@ WasmCode* NativeModule::PublishCodeLocked( ...@@ -1234,10 +1234,6 @@ WasmCode* NativeModule::PublishCodeLocked(
// {WasmCodeRefScope} though, so it cannot die here. // {WasmCodeRefScope} though, so it cannot die here.
code->DecRefOnLiveCode(); code->DecRefOnLiveCode();
} }
if (!code->for_debugging() && tiering_state_ == kTieredDown &&
code->tier() == ExecutionTier::kTurbofan) {
liftoff_bailout_count_.fetch_add(1);
}
return code; return code;
} }
...@@ -1535,6 +1531,20 @@ void NativeModule::SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes) { ...@@ -1535,6 +1531,20 @@ void NativeModule::SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes) {
} }
} }
void NativeModule::UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier) {
if (tier == WasmCompilationUnit::GetBaselineExecutionTier(this->module())) {
if (!compilation_state_->baseline_compilation_finished()) {
baseline_compilation_cpu_duration_.fetch_add(
cpu_duration, std::memory_order::memory_order_relaxed);
}
} else if (tier == ExecutionTier::kTurbofan) {
if (!compilation_state_->top_tier_compilation_finished()) {
tier_up_cpu_duration_.fetch_add(cpu_duration,
std::memory_order::memory_order_relaxed);
}
}
}
void NativeModule::TransferNewOwnedCodeLocked() const { void NativeModule::TransferNewOwnedCodeLocked() const {
allocation_mutex_.AssertHeld(); allocation_mutex_.AssertHeld();
DCHECK(!new_owned_code_.empty()); DCHECK(!new_owned_code_.empty());
......
...@@ -698,6 +698,10 @@ class V8_EXPORT_PRIVATE NativeModule final { ...@@ -698,6 +698,10 @@ class V8_EXPORT_PRIVATE NativeModule final {
size_t liftoff_bailout_count() const { return liftoff_bailout_count_.load(); } size_t liftoff_bailout_count() const { return liftoff_bailout_count_.load(); }
size_t liftoff_code_size() const { return liftoff_code_size_.load(); } size_t liftoff_code_size() const { return liftoff_code_size_.load(); }
size_t turbofan_code_size() const { return turbofan_code_size_.load(); } size_t turbofan_code_size() const { return turbofan_code_size_.load(); }
size_t baseline_compilation_cpu_duration() const {
return baseline_compilation_cpu_duration_.load();
}
size_t tier_up_cpu_duration() const { return tier_up_cpu_duration_.load(); }
bool HasWireBytes() const { bool HasWireBytes() const {
auto wire_bytes = std::atomic_load(&wire_bytes_); auto wire_bytes = std::atomic_load(&wire_bytes_);
...@@ -705,6 +709,12 @@ class V8_EXPORT_PRIVATE NativeModule final { ...@@ -705,6 +709,12 @@ class V8_EXPORT_PRIVATE NativeModule final {
} }
void SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes); void SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes);
void UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier);
void AddLiftoffBailout() {
liftoff_bailout_count_.fetch_add(1,
std::memory_order::memory_order_relaxed);
}
WasmCode* Lookup(Address) const; WasmCode* Lookup(Address) const;
WasmImportWrapperCache* import_wrapper_cache() const { WasmImportWrapperCache* import_wrapper_cache() const {
...@@ -919,6 +929,8 @@ class V8_EXPORT_PRIVATE NativeModule final { ...@@ -919,6 +929,8 @@ class V8_EXPORT_PRIVATE NativeModule final {
std::atomic<size_t> liftoff_bailout_count_{0}; std::atomic<size_t> liftoff_bailout_count_{0};
std::atomic<size_t> liftoff_code_size_{0}; std::atomic<size_t> liftoff_code_size_{0};
std::atomic<size_t> turbofan_code_size_{0}; std::atomic<size_t> turbofan_code_size_{0};
std::atomic<size_t> baseline_compilation_cpu_duration_{0};
std::atomic<size_t> tier_up_cpu_duration_{0};
}; };
class V8_EXPORT_PRIVATE WasmCodeManager final { class V8_EXPORT_PRIVATE WasmCodeManager final {
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "include/libplatform/libplatform.h" #include "include/libplatform/libplatform.h"
#include "include/v8-metrics.h" #include "include/v8-metrics.h"
#include "src/api/api-inl.h" #include "src/api/api-inl.h"
#include "src/base/platform/time.h"
#include "src/wasm/wasm-engine.h" #include "src/wasm/wasm-engine.h"
#include "src/wasm/wasm-module-builder.h" #include "src/wasm/wasm-module-builder.h"
#include "test/cctest/cctest.h" #include "test/cctest/cctest.h"
...@@ -271,6 +272,9 @@ COMPILE_TEST(TestEventMetrics) { ...@@ -271,6 +272,9 @@ COMPILE_TEST(TestEventMetrics) {
std::shared_ptr<MetricsRecorder> recorder = std::shared_ptr<MetricsRecorder> recorder =
std::make_shared<MetricsRecorder>(); std::make_shared<MetricsRecorder>();
reinterpret_cast<v8::Isolate*>(isolate)->SetMetricsRecorder(recorder); reinterpret_cast<v8::Isolate*>(isolate)->SetMetricsRecorder(recorder);
if (v8::base::ThreadTicks::IsSupported()) {
v8::base::ThreadTicks::WaitUntilInitialized();
}
TestSignatures sigs; TestSignatures sigs;
v8::internal::AccountingAllocator allocator; v8::internal::AccountingAllocator allocator;
...@@ -313,6 +317,9 @@ COMPILE_TEST(TestEventMetrics) { ...@@ -313,6 +317,9 @@ COMPILE_TEST(TestEventMetrics) {
recorder->module_decoded_.back().module_size_in_bytes); recorder->module_decoded_.back().module_size_in_bytes);
CHECK_EQ(1, recorder->module_decoded_.back().function_count); CHECK_EQ(1, recorder->module_decoded_.back().function_count);
CHECK_LE(0, recorder->module_decoded_.back().wall_clock_duration_in_us); CHECK_LE(0, recorder->module_decoded_.back().wall_clock_duration_in_us);
CHECK_IMPLIES(
v8::base::ThreadTicks::IsSupported() && !i::FLAG_wasm_test_streaming,
recorder->module_decoded_.back().cpu_duration_in_us > 0);
CHECK_EQ(1, recorder->module_compiled_.size()); CHECK_EQ(1, recorder->module_compiled_.size());
CHECK(recorder->module_compiled_.back().success); CHECK(recorder->module_compiled_.back().success);
...@@ -331,8 +338,12 @@ COMPILE_TEST(TestEventMetrics) { ...@@ -331,8 +338,12 @@ COMPILE_TEST(TestEventMetrics) {
recorder->module_compiled_.back().code_size_in_bytes); recorder->module_compiled_.back().code_size_in_bytes);
CHECK_GE(native_module->generated_code_size(), CHECK_GE(native_module->generated_code_size(),
recorder->module_compiled_.back().code_size_in_bytes); recorder->module_compiled_.back().code_size_in_bytes);
CHECK_EQ(0, recorder->module_compiled_.back().liftoff_bailout_count);
CHECK_LE(0, recorder->module_compiled_.back().wall_clock_duration_in_us); CHECK_LE(0, recorder->module_compiled_.back().wall_clock_duration_in_us);
CHECK_EQ(native_module->baseline_compilation_cpu_duration(),
recorder->module_compiled_.back().cpu_duration_in_us);
CHECK_IMPLIES(
v8::base::ThreadTicks::IsSupported() && !i::FLAG_wasm_test_streaming,
recorder->module_compiled_.back().cpu_duration_in_us > 0);
CHECK_EQ(1, recorder->module_instantiated_.size()); CHECK_EQ(1, recorder->module_instantiated_.size());
CHECK(recorder->module_instantiated_.back().success); CHECK(recorder->module_instantiated_.back().success);
...@@ -351,6 +362,13 @@ COMPILE_TEST(TestEventMetrics) { ...@@ -351,6 +362,13 @@ COMPILE_TEST(TestEventMetrics) {
CHECK_GE(native_module->committed_code_space(), CHECK_GE(native_module->committed_code_space(),
recorder->module_tiered_up_.back().code_size_in_bytes); recorder->module_tiered_up_.back().code_size_in_bytes);
CHECK_LE(0, recorder->module_tiered_up_.back().wall_clock_duration_in_us); CHECK_LE(0, recorder->module_tiered_up_.back().wall_clock_duration_in_us);
CHECK_EQ(native_module->tier_up_cpu_duration(),
recorder->module_tiered_up_.back().cpu_duration_in_us);
CHECK_IMPLIES(
v8::base::ThreadTicks::IsSupported() && i::FLAG_wasm_tier_up &&
i::FLAG_liftoff &&
recorder->module_compiled_.back().liftoff_bailout_count == 0,
recorder->module_tiered_up_.back().cpu_duration_in_us > 0);
} }
} // namespace wasm } // namespace wasm
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment