Commit 4976642b authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[wasm] Switch dynamic tiering to budget-based approach

FLAG_wasm_dynamic_tiering is still off by default. When enabled,
it now uses the technique previously behind --new-wasm-dynamic-tiering.

Bug: v8:12281
Change-Id: I365c2c066e62418cd6abc7830f95d1fe0f950e33
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3275570
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77883}
parent 79f617b0
......@@ -987,10 +987,6 @@ DEFINE_BOOL(wasm_tier_up, true,
"have an effect)")
DEFINE_BOOL(wasm_dynamic_tiering, false,
"enable dynamic tier up to the optimizing compiler")
DEFINE_BOOL(new_wasm_dynamic_tiering, false, "dynamic tier up (new impl)")
// For dynamic tiering to have an effect, we have to turn off eager tierup.
// This is handled in module-compiler.cc for --wasm-dynamic-tiering.
DEFINE_NEG_IMPLICATION(new_wasm_dynamic_tiering, wasm_tier_up)
DEFINE_INT(wasm_tiering_budget, 1800000,
"budget for dynamic tiering (rough approximation of bytes executed")
DEFINE_INT(
......
......@@ -299,14 +299,12 @@ RUNTIME_FUNCTION(Runtime_WasmTriggerTierUp) {
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(WasmInstanceObject, instance, 0);
if (FLAG_new_wasm_dynamic_tiering) {
// We're reusing this interrupt mechanism to interrupt long-running loops.
StackLimitCheck check(isolate);
DCHECK(!check.JsHasOverflowed());
if (check.InterruptRequested()) {
Object result = isolate->stack_guard()->HandleInterrupts();
if (result.IsException()) return result;
}
// We're reusing this interrupt mechanism to interrupt long-running loops.
StackLimitCheck check(isolate);
DCHECK(!check.JsHasOverflowed());
if (check.InterruptRequested()) {
Object result = isolate->stack_guard()->HandleInterrupts();
if (result.IsException()) return result;
}
FrameFinder<WasmFrame> frame_finder(isolate);
......
......@@ -785,6 +785,13 @@ class LiftoffCompiler {
DefineSafepoint();
}
bool dynamic_tiering() {
return env_->dynamic_tiering == DynamicTiering::kEnabled &&
for_debugging_ == kNoDebugging &&
(FLAG_wasm_tier_up_filter == -1 ||
FLAG_wasm_tier_up_filter == func_index_);
}
void StartFunctionBody(FullDecoder* decoder, Control* block) {
for (uint32_t i = 0; i < __ num_locals(); ++i) {
if (!CheckSupportedType(decoder, __ local_kind(i), "param")) return;
......@@ -834,11 +841,11 @@ class LiftoffCompiler {
} else {
__ Spill(liftoff::kFeedbackVectorOffset, WasmValue::ForUintPtr(0));
}
if (FLAG_new_wasm_dynamic_tiering) {
if (dynamic_tiering()) {
LiftoffRegList pinned = parameter_registers;
LiftoffRegister tmp = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(tmp.gp(), NumLiftoffFunctionCallsArray,
kSystemPointerSize, pinned);
LOAD_INSTANCE_FIELD(tmp.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Load(tmp, tmp.gp(), no_reg, offset, LoadType::kI32Load, pinned);
......@@ -904,49 +911,6 @@ class LiftoffCompiler {
// is never a position of any instruction in the function.
StackCheck(decoder, 0);
if (env_->dynamic_tiering == DynamicTiering::kEnabled &&
for_debugging_ == kNoDebugging) {
// TODO(arobin): Avoid spilling registers unconditionally.
__ SpillAllRegisters();
CODE_COMMENT("dynamic tiering");
LiftoffRegList pinned;
// Get the number of calls array address.
LiftoffRegister array_address =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(array_address.gp(), NumLiftoffFunctionCallsArray,
kSystemPointerSize, pinned);
// Compute the correct offset in the array.
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
// Get the number of calls and update it.
LiftoffRegister old_number_of_calls =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister new_number_of_calls =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
__ Load(old_number_of_calls, array_address.gp(), no_reg, offset,
LoadType::kI32Load, pinned);
__ emit_i32_addi(new_number_of_calls.gp(), old_number_of_calls.gp(), 1);
__ Store(array_address.gp(), no_reg, offset, new_number_of_calls,
StoreType::kI32Store, pinned);
// Emit the runtime call if necessary.
Label no_tierup;
// Check if the number of calls is a power of 2.
__ emit_i32_and(old_number_of_calls.gp(), old_number_of_calls.gp(),
new_number_of_calls.gp());
__ emit_cond_jump(kNotEqualZero, &no_tierup, kI32,
old_number_of_calls.gp());
TierUpFunction(decoder);
// After the runtime call, the instance cache register is clobbered (we
// reset it already in {SpillAllRegisters} above, but then we still access
// the instance afterwards).
__ cache_state()->ClearCachedInstanceRegister();
__ bind(&no_tierup);
}
if (FLAG_trace_wasm) TraceFunctionEntry(decoder);
}
......@@ -1215,7 +1179,7 @@ class LiftoffCompiler {
PushControl(loop);
if (!FLAG_new_wasm_dynamic_tiering) {
if (!dynamic_tiering()) {
// When the budget-based tiering mechanism is enabled, use that to
// check for interrupt requests; otherwise execute a stack check in the
// loop header.
......@@ -2271,13 +2235,13 @@ class LiftoffCompiler {
}
void TierupCheckOnExit(FullDecoder* decoder) {
if (!FLAG_new_wasm_dynamic_tiering) return;
if (!dynamic_tiering()) return;
TierupCheck(decoder, decoder->position(), __ pc_offset());
LiftoffRegList pinned;
LiftoffRegister budget = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister array = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(array.gp(), NumLiftoffFunctionCallsArray,
kSystemPointerSize, pinned);
LOAD_INSTANCE_FIELD(array.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Fill(budget, liftoff::kTierupBudgetOffset, ValueKind::kI32);
......@@ -2623,7 +2587,7 @@ class LiftoffCompiler {
*__ cache_state(), __ num_locals(), target->br_merge()->arity,
target->stack_depth + target->num_exceptions);
}
if (FLAG_new_wasm_dynamic_tiering) {
if (dynamic_tiering()) {
if (target->is_loop()) {
DCHECK(target->label.get()->is_bound());
int jump_distance = __ pc_offset() - target->label.get()->pos();
......
......@@ -1327,7 +1327,7 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
const WasmModule* module = native_module->module();
size_t priority;
if (FLAG_new_wasm_dynamic_tiering) {
{
base::MutexGuard mutex_guard(&module->type_feedback.mutex);
int saved_priority =
module->type_feedback.feedback_for_function[func_index].tierup_priority;
......@@ -1353,11 +1353,6 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
std::move(feedback);
}
if (!FLAG_new_wasm_dynamic_tiering) {
uint32_t* call_array = native_module->num_liftoff_function_calls_array();
int offset = wasm::declared_function_index(module, func_index);
priority = base::Relaxed_Load(reinterpret_cast<int*>(&call_array[offset]));
}
compilation_state->AddTopTierPriorityCompilationUnit(tiering_unit, priority);
}
......
......@@ -75,8 +75,8 @@ WasmCode* CompileImportWrapper(
// also lazy.
bool CompileLazy(Isolate*, Handle<WasmInstanceObject>, int func_index);
void TriggerTierUp(Isolate*, NativeModule*, int func_index,
Handle<WasmInstanceObject> instance);
V8_EXPORT_PRIVATE void TriggerTierUp(Isolate*, NativeModule*, int func_index,
Handle<WasmInstanceObject> instance);
template <typename Key, typename Hash>
class WrapperQueue {
......
......@@ -997,18 +997,11 @@ NativeModule::NativeModule(const WasmFeatures& enabled,
if (module_->num_declared_functions > 0) {
code_table_ =
std::make_unique<WasmCode*[]>(module_->num_declared_functions);
num_liftoff_function_calls_ =
tiering_budgets_ =
std::make_unique<uint32_t[]>(module_->num_declared_functions);
if (FLAG_new_wasm_dynamic_tiering) {
std::fill_n(num_liftoff_function_calls_.get(),
module_->num_declared_functions, FLAG_wasm_tiering_budget);
} else {
// Start counter at 4 to avoid runtime calls for smaller numbers.
constexpr int kCounterStart = 4;
std::fill_n(num_liftoff_function_calls_.get(),
module_->num_declared_functions, kCounterStart);
}
std::fill_n(tiering_budgets_.get(), module_->num_declared_functions,
FLAG_wasm_tiering_budget);
}
// Even though there cannot be another thread using this object (since we are
// just constructing it), we need to hold the mutex to fulfill the
......
......@@ -839,9 +839,7 @@ class V8_EXPORT_PRIVATE NativeModule final {
// Get or create the debug info for this NativeModule.
DebugInfo* GetDebugInfo();
uint32_t* num_liftoff_function_calls_array() {
return num_liftoff_function_calls_.get();
}
uint32_t* tiering_budget_array() { return tiering_budgets_.get(); }
private:
friend class WasmCode;
......@@ -944,7 +942,7 @@ class V8_EXPORT_PRIVATE NativeModule final {
std::unique_ptr<WasmImportWrapperCache> import_wrapper_cache_;
// Array to handle number of function calls.
std::unique_ptr<uint32_t[]> num_liftoff_function_calls_;
std::unique_ptr<uint32_t[]> tiering_budgets_;
// This mutex protects concurrent calls to {AddCode} and friends.
// TODO(dlehmann): Revert this to a regular {Mutex} again.
......
......@@ -225,8 +225,8 @@ PRIMITIVE_ACCESSORS(WasmInstanceObject, dropped_elem_segments, byte*,
kDroppedElemSegmentsOffset)
PRIMITIVE_ACCESSORS(WasmInstanceObject, hook_on_function_call_address, Address,
kHookOnFunctionCallAddressOffset)
PRIMITIVE_ACCESSORS(WasmInstanceObject, num_liftoff_function_calls_array,
uint32_t*, kNumLiftoffFunctionCallsArrayOffset)
PRIMITIVE_ACCESSORS(WasmInstanceObject, tiering_budget_array, uint32_t*,
kTieringBudgetArrayOffset)
PRIMITIVE_ACCESSORS(WasmInstanceObject, break_on_entry, uint8_t,
kBreakOnEntryOffset)
......
......@@ -1204,8 +1204,8 @@ Handle<WasmInstanceObject> WasmInstanceObject::New(
isolate->debug()->hook_on_function_call_address());
instance->set_managed_object_maps(*isolate->factory()->empty_fixed_array());
instance->set_feedback_vectors(*isolate->factory()->empty_fixed_array());
instance->set_num_liftoff_function_calls_array(
module_object->native_module()->num_liftoff_function_calls_array());
instance->set_tiering_budget_array(
module_object->native_module()->tiering_budget_array());
instance->set_break_on_entry(module_object->script().break_on_entry());
// Insert the new instance into the scripts weak list of instances. This list
......
......@@ -352,7 +352,7 @@ class V8_EXPORT_PRIVATE WasmInstanceObject : public JSObject {
DECL_PRIMITIVE_ACCESSORS(data_segment_sizes, uint32_t*)
DECL_PRIMITIVE_ACCESSORS(dropped_elem_segments, byte*)
DECL_PRIMITIVE_ACCESSORS(hook_on_function_call_address, Address)
DECL_PRIMITIVE_ACCESSORS(num_liftoff_function_calls_array, uint32_t*)
DECL_PRIMITIVE_ACCESSORS(tiering_budget_array, uint32_t*)
DECL_PRIMITIVE_ACCESSORS(break_on_entry, uint8_t)
// Clear uninitialized padding space. This ensures that the snapshot content
......@@ -393,7 +393,7 @@ class V8_EXPORT_PRIVATE WasmInstanceObject : public JSObject {
V(kDataSegmentSizesOffset, kSystemPointerSize) \
V(kDroppedElemSegmentsOffset, kSystemPointerSize) \
V(kHookOnFunctionCallAddressOffset, kSystemPointerSize) \
V(kNumLiftoffFunctionCallsArrayOffset, kSystemPointerSize) \
V(kTieringBudgetArrayOffset, kSystemPointerSize) \
/* Less than system pointer size aligned fields are below. */ \
V(kModuleObjectOffset, kTaggedSize) \
V(kExportsObjectOffset, kTaggedSize) \
......
......@@ -8,6 +8,7 @@
#include "src/init/v8.h"
#include "src/objects/managed.h"
#include "src/objects/objects-inl.h"
#include "src/wasm/module-compiler.h"
#include "src/wasm/module-decoder.h"
#include "src/wasm/streaming-decoder.h"
#include "src/wasm/wasm-engine.h"
......@@ -1176,13 +1177,7 @@ STREAM_TEST(TestIncrementalCaching) {
CHECK(tester.native_module()->GetCode(2)->is_liftoff());
// No TurboFan compilation happened yet, and therefore no call to the cache.
CHECK_EQ(0, call_cache_counter);
bool exception = false;
// The tier-up threshold is hard-coded right now.
constexpr int tier_up_threshold = 4;
for (int i = 0; i < tier_up_threshold; ++i) {
testing::CallWasmFunctionForTesting(i_isolate, instance, "f0", 0, nullptr,
&exception);
}
i::wasm::TriggerTierUp(i_isolate, tester.native_module().get(), 0, instance);
tester.RunCompilerTasks();
CHECK(!tester.native_module()->GetCode(0)->is_liftoff());
CHECK(tester.native_module()->GetCode(1)->is_liftoff());
......@@ -1193,10 +1188,7 @@ STREAM_TEST(TestIncrementalCaching) {
i::wasm::WasmSerializer serializer(tester.native_module().get());
serialized_size = serializer.GetSerializedNativeModuleSize();
}
for (int i = 0; i < tier_up_threshold; ++i) {
testing::CallWasmFunctionForTesting(i_isolate, instance, "f1", 0, nullptr,
&exception);
}
i::wasm::TriggerTierUp(i_isolate, tester.native_module().get(), 1, instance);
tester.RunCompilerTasks();
CHECK(!tester.native_module()->GetCode(0)->is_liftoff());
CHECK(!tester.native_module()->GetCode(1)->is_liftoff());
......
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --wasm-staging --wasm-dynamic-tiering
// Flags: --wasm-staging --wasm-dynamic-tiering --allow-natives-syntax
d8.file.execute('test/mjsunit/wasm/wasm-module-builder.js');
......@@ -16,6 +16,6 @@ builder.addFunction('load', kSig_i_i).addBody([
]).exportFunc();
const instance = builder.instantiate();
// Call multiple times to trigger dynamic tiering.
for (let i = 0; i < 20; ++i) {
while (%IsLiftoffFunction(instance.exports.load)) {
instance.exports.load(1);
}
......@@ -4,6 +4,8 @@
// Flags: --allow-natives-syntax --wasm-dynamic-tiering --liftoff
// Flags: --no-wasm-tier-up --no-stress-opt
// Make the test faster:
// Flags: --wasm-tiering-budget=1000
// This test busy-waits for tier-up to be complete, hence it does not work in
// predictable more where we only have a single thread.
......@@ -11,7 +13,6 @@
d8.file.execute('test/mjsunit/wasm/wasm-module-builder.js');
const num_iterations = 4;
const num_functions = 2;
const builder = new WasmModuleBuilder();
......@@ -23,20 +24,16 @@ for (let i = 0; i < num_functions; ++i) {
let instance = builder.instantiate();
for (let i = 0; i < num_iterations - 1; ++i) {
// The first few calls happen with Liftoff code.
for (let i = 0; i < 3; ++i) {
instance.exports.f0();
instance.exports.f1();
}
assertTrue(%IsLiftoffFunction(instance.exports.f0));
assertTrue(%IsLiftoffFunction(instance.exports.f1));
instance.exports.f1();
// Busy waiting until the function is tiered up.
while (true) {
if (!%IsLiftoffFunction(instance.exports.f1)) {
break;
}
// Keep calling the function until it gets tiered up.
while (%IsLiftoffFunction(instance.exports.f1)) {
instance.exports.f1();
}
assertTrue(%IsLiftoffFunction(instance.exports.f0));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment