Commit 7a93bd64 authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[wasm] Execution budget based dynamic tiering

Temporarily behind a new flag: --new-wasm-dynamic-tiering
The plan is to merge this into the existing --wasm-dynamic-tiering
flag once it's been confirmed to be generally beneficial.

Bug: v8:12281
Change-Id: I191d03170f8d5360073a45fea170f432074f7534
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3247632Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77620}
parent 39d58fd9
...@@ -994,6 +994,12 @@ DEFINE_BOOL(wasm_tier_up, true, ...@@ -994,6 +994,12 @@ DEFINE_BOOL(wasm_tier_up, true,
"have an effect)") "have an effect)")
DEFINE_BOOL(wasm_dynamic_tiering, false, DEFINE_BOOL(wasm_dynamic_tiering, false,
"enable dynamic tier up to the optimizing compiler") "enable dynamic tier up to the optimizing compiler")
DEFINE_BOOL(new_wasm_dynamic_tiering, false, "dynamic tier up (new impl)")
// For dynamic tiering to have an effect, we have to turn off eager tierup.
// This is handled in module-compiler.cc for --wasm-dynamic-tiering.
DEFINE_NEG_IMPLICATION(new_wasm_dynamic_tiering, wasm_tier_up)
DEFINE_INT(wasm_tiering_budget, 1800000,
"budget for dynamic tiering (rough approximation of bytes executed")
DEFINE_INT( DEFINE_INT(
wasm_caching_threshold, 1000000, wasm_caching_threshold, 1000000,
"the amount of wasm top tier code that triggers the next caching event") "the amount of wasm top tier code that triggers the next caching event")
...@@ -1100,7 +1106,6 @@ DEFINE_BOOL(trace_wasm_speculative_inlining, false, ...@@ -1100,7 +1106,6 @@ DEFINE_BOOL(trace_wasm_speculative_inlining, false,
"trace wasm speculative inlining") "trace wasm speculative inlining")
DEFINE_IMPLICATION(wasm_speculative_inlining, experimental_wasm_typed_funcref) DEFINE_IMPLICATION(wasm_speculative_inlining, experimental_wasm_typed_funcref)
DEFINE_IMPLICATION(wasm_speculative_inlining, wasm_inlining) DEFINE_IMPLICATION(wasm_speculative_inlining, wasm_inlining)
DEFINE_IMPLICATION(wasm_speculative_inlining, wasm_dynamic_tiering)
DEFINE_NEG_IMPLICATION(wasm_speculative_inlining, wasm_tier_up) DEFINE_NEG_IMPLICATION(wasm_speculative_inlining, wasm_tier_up)
DEFINE_BOOL(wasm_loop_unrolling, true, DEFINE_BOOL(wasm_loop_unrolling, true,
"enable loop unrolling for wasm functions") "enable loop unrolling for wasm functions")
......
...@@ -292,6 +292,16 @@ RUNTIME_FUNCTION(Runtime_WasmTriggerTierUp) { ...@@ -292,6 +292,16 @@ RUNTIME_FUNCTION(Runtime_WasmTriggerTierUp) {
DCHECK_EQ(1, args.length()); DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(WasmInstanceObject, instance, 0); CONVERT_ARG_HANDLE_CHECKED(WasmInstanceObject, instance, 0);
if (FLAG_new_wasm_dynamic_tiering) {
// We're reusing this interrupt mechanism to interrupt long-running loops.
StackLimitCheck check(isolate);
DCHECK(!check.JsHasOverflowed());
if (check.InterruptRequested()) {
Object result = isolate->stack_guard()->HandleInterrupts();
if (result.IsException()) return result;
}
}
FrameFinder<WasmFrame> frame_finder(isolate); FrameFinder<WasmFrame> frame_finder(isolate);
int func_index = frame_finder.frame()->function_index(); int func_index = frame_finder.frame()->function_index();
auto* native_module = instance->module_object().native_module(); auto* native_module = instance->module_object().native_module();
......
...@@ -56,13 +56,14 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) { ...@@ -56,13 +56,14 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
// 0 | previous frame (fp)| // 0 | previous frame (fp)|
// -----+--------------------+ <-- frame ptr (fp) // -----+--------------------+ <-- frame ptr (fp)
// -1 | 0xa: WASM | // -1 | 0xa: WASM |
// -2 | instance | // -2 | instance |
// -3 | feedback vector | // -3 | feedback vector |
// -4 | tiering budget |
// -----+--------------------+--------------------------- // -----+--------------------+---------------------------
// -4 | slot 0 (high) | ^ // -5 | slot 0 (high) | ^
// -5 | slot 0 (low) | | // -6 | slot 0 (low) | |
// -6 | slot 1 (high) | Frame slots // -7 | slot 1 (high) | Frame slots
// -7 | slot 1 (low) | | // -8 | slot 1 (low) | |
// | | v // | | v
// -----+--------------------+ <-- stack ptr (sp) // -----+--------------------+ <-- stack ptr (sp)
// //
...@@ -70,6 +71,7 @@ static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize, ...@@ -70,6 +71,7 @@ static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize,
"Slot size should be twice the size of the 32 bit pointer."); "Slot size should be twice the size of the 32 bit pointer.");
constexpr int kInstanceOffset = 2 * kSystemPointerSize; constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize; constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
// kPatchInstructionsRequired sets a maximum limit of how many instructions that // kPatchInstructionsRequired sets a maximum limit of how many instructions that
// PatchPrepareStackFrame will use in order to increase the stack appropriately. // PatchPrepareStackFrame will use in order to increase the stack appropriately.
// Three instructions are required to sub a large constant, movw + movt + sub. // Three instructions are required to sub a large constant, movw + movt + sub.
...@@ -559,7 +561,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); } ...@@ -559,7 +561,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static // static
constexpr int LiftoffAssembler::StaticStackFrameSize() { constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kFeedbackVectorOffset; return liftoff::kTierupBudgetOffset;
} }
int LiftoffAssembler::SlotSizeForType(ValueKind kind) { int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
...@@ -2239,6 +2241,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond, ...@@ -2239,6 +2241,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
b(label, cond); b(label, cond);
} }
void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
int subtrahend,
Label* result_negative) {
sub(value, value, Operand(subtrahend), SetCC);
b(result_negative, mi);
}
void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) { void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
clz(dst, src); clz(dst, src);
mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2)); mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2));
......
...@@ -58,9 +58,10 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) { ...@@ -58,9 +58,10 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
// -1 | 0xa: WASM | // -1 | 0xa: WASM |
// -2 | instance | // -2 | instance |
// -3 | feedback vector| // -3 | feedback vector|
// -4 | tiering budget |
// -----+--------------------+--------------------------- // -----+--------------------+---------------------------
// -4 | slot 0 | ^ // -5 | slot 0 | ^
// -5 | slot 1 | | // -6 | slot 1 | |
// | | Frame slots // | | Frame slots
// | | | // | | |
// | | v // | | v
...@@ -70,6 +71,7 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) { ...@@ -70,6 +71,7 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
constexpr int kInstanceOffset = 2 * kSystemPointerSize; constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize; constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); } inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
...@@ -386,7 +388,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); } ...@@ -386,7 +388,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static // static
constexpr int LiftoffAssembler::StaticStackFrameSize() { constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kFeedbackVectorOffset; return liftoff::kTierupBudgetOffset;
} }
int LiftoffAssembler::SlotSizeForType(ValueKind kind) { int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
...@@ -1590,6 +1592,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond, ...@@ -1590,6 +1592,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
B(label, cond); B(label, cond);
} }
void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
int subtrahend,
Label* result_negative) {
Subs(value.W(), value.W(), Immediate(subtrahend));
B(result_negative, mi);
}
void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) { void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
Cmp(src.W(), wzr); Cmp(src.W(), wzr);
Cset(dst.W(), eq); Cset(dst.W(), eq);
......
...@@ -52,6 +52,7 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) { ...@@ -52,6 +52,7 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
// ebp-4 holds the stack marker, ebp-8 is the instance parameter. // ebp-4 holds the stack marker, ebp-8 is the instance parameter.
constexpr int kInstanceOffset = 8; constexpr int kInstanceOffset = 8;
constexpr int kFeedbackVectorOffset = 12; // ebp-12 is the feedback vector. constexpr int kFeedbackVectorOffset = 12; // ebp-12 is the feedback vector.
constexpr int kTierupBudgetOffset = 16; // ebp-16 is the tiering budget.
inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); } inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); }
...@@ -308,7 +309,7 @@ void LiftoffAssembler::AbortCompilation() {} ...@@ -308,7 +309,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static // static
constexpr int LiftoffAssembler::StaticStackFrameSize() { constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kFeedbackVectorOffset; return liftoff::kTierupBudgetOffset;
} }
int LiftoffAssembler::SlotSizeForType(ValueKind kind) { int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
...@@ -2488,6 +2489,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond, ...@@ -2488,6 +2489,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
j(cond, label); j(cond, label);
} }
void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
int subtrahend,
Label* result_negative) {
sub(value, Immediate(subtrahend));
j(negative, result_negative);
}
namespace liftoff { namespace liftoff {
// Setcc into dst register, given a scratch byte register (might be the same as // Setcc into dst register, given a scratch byte register (might be the same as
......
...@@ -1012,6 +1012,8 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -1012,6 +1012,8 @@ class LiftoffAssembler : public TurboAssembler {
Register lhs, Register rhs = no_reg); Register lhs, Register rhs = no_reg);
inline void emit_i32_cond_jumpi(LiftoffCondition, Label*, Register lhs, inline void emit_i32_cond_jumpi(LiftoffCondition, Label*, Register lhs,
int imm); int imm);
inline void emit_i32_subi_jump_negative(Register value, int subtrahend,
Label* result_negative);
// Set {dst} to 1 if condition holds, 0 otherwise. // Set {dst} to 1 if condition holds, 0 otherwise.
inline void emit_i32_eqz(Register dst, Register src); inline void emit_i32_eqz(Register dst, Register src);
inline void emit_i32_set_cond(LiftoffCondition, Register dst, Register lhs, inline void emit_i32_set_cond(LiftoffCondition, Register dst, Register lhs,
......
This diff is collapsed.
...@@ -350,12 +350,14 @@ class LiftoffRegList { ...@@ -350,12 +350,14 @@ class LiftoffRegList {
constexpr LiftoffRegList() = default; constexpr LiftoffRegList() = default;
Register set(Register reg) { return set(LiftoffRegister(reg)).gp(); } constexpr Register set(Register reg) {
DoubleRegister set(DoubleRegister reg) { return set(LiftoffRegister(reg)).gp();
}
constexpr DoubleRegister set(DoubleRegister reg) {
return set(LiftoffRegister(reg)).fp(); return set(LiftoffRegister(reg)).fp();
} }
LiftoffRegister set(LiftoffRegister reg) { constexpr LiftoffRegister set(LiftoffRegister reg) {
if (reg.is_pair()) { if (reg.is_pair()) {
regs_ |= storage_t{1} << reg.low().liftoff_code(); regs_ |= storage_t{1} << reg.low().liftoff_code();
regs_ |= storage_t{1} << reg.high().liftoff_code(); regs_ |= storage_t{1} << reg.high().liftoff_code();
......
...@@ -66,6 +66,7 @@ static_assert((kLiftoffAssemblerFpCacheRegs & ...@@ -66,6 +66,7 @@ static_assert((kLiftoffAssemblerFpCacheRegs &
// rbp-8 holds the stack marker, rbp-16 is the instance parameter. // rbp-8 holds the stack marker, rbp-16 is the instance parameter.
constexpr int kInstanceOffset = 16; constexpr int kInstanceOffset = 16;
constexpr int kFeedbackVectorOffset = 24; // rbp-24 is the feedback vector. constexpr int kFeedbackVectorOffset = 24; // rbp-24 is the feedback vector.
constexpr int kTierupBudgetOffset = 32; // rbp-32 is the feedback vector.
inline Operand GetStackSlot(int offset) { return Operand(rbp, -offset); } inline Operand GetStackSlot(int offset) { return Operand(rbp, -offset); }
...@@ -2161,6 +2162,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond, ...@@ -2161,6 +2162,13 @@ void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
j(cond, label); j(cond, label);
} }
void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
int subtrahend,
Label* result_negative) {
subl(value, Immediate(subtrahend));
j(negative, result_negative);
}
void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) { void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
testl(src, src); testl(src, src);
setcc(equal, dst); setcc(equal, dst);
......
...@@ -1326,6 +1326,23 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module, ...@@ -1326,6 +1326,23 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
kNoDebugging}; kNoDebugging};
const WasmModule* module = native_module->module(); const WasmModule* module = native_module->module();
size_t priority;
if (FLAG_new_wasm_dynamic_tiering) {
base::MutexGuard mutex_guard(&module->type_feedback.mutex);
int saved_priority =
module->type_feedback.feedback_for_function[func_index].tierup_priority;
saved_priority++;
module->type_feedback.feedback_for_function[func_index].tierup_priority =
saved_priority;
// Continue to creating a compilation unit if this is the first time
// we detect this function as hot, and create a new higher-priority unit
// if the number of tierup checks is a power of two (at least 4).
if (saved_priority > 1 &&
(saved_priority < 4 || (saved_priority & (saved_priority - 1)) != 0)) {
return;
}
priority = saved_priority;
}
if (FLAG_wasm_speculative_inlining) { if (FLAG_wasm_speculative_inlining) {
auto feedback = ProcessTypeFeedback(isolate, instance, func_index); auto feedback = ProcessTypeFeedback(isolate, instance, func_index);
base::MutexGuard mutex_guard(&module->type_feedback.mutex); base::MutexGuard mutex_guard(&module->type_feedback.mutex);
...@@ -1336,11 +1353,11 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module, ...@@ -1336,11 +1353,11 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
std::move(feedback); std::move(feedback);
} }
uint32_t* call_array = native_module->num_liftoff_function_calls_array(); if (!FLAG_new_wasm_dynamic_tiering) {
int offset = wasm::declared_function_index(module, func_index); uint32_t* call_array = native_module->num_liftoff_function_calls_array();
int offset = wasm::declared_function_index(module, func_index);
size_t priority = priority = base::Relaxed_Load(reinterpret_cast<int*>(&call_array[offset]));
base::Relaxed_Load(reinterpret_cast<int*>(&call_array[offset])); }
compilation_state->AddTopTierPriorityCompilationUnit(tiering_unit, priority); compilation_state->AddTopTierPriorityCompilationUnit(tiering_unit, priority);
} }
......
...@@ -1000,10 +1000,15 @@ NativeModule::NativeModule(const WasmFeatures& enabled, ...@@ -1000,10 +1000,15 @@ NativeModule::NativeModule(const WasmFeatures& enabled,
num_liftoff_function_calls_ = num_liftoff_function_calls_ =
std::make_unique<uint32_t[]>(module_->num_declared_functions); std::make_unique<uint32_t[]>(module_->num_declared_functions);
// Start counter at 4 to avoid runtime calls for smaller numbers. if (FLAG_new_wasm_dynamic_tiering) {
constexpr int kCounterStart = 4; std::fill_n(num_liftoff_function_calls_.get(),
std::fill_n(num_liftoff_function_calls_.get(), module_->num_declared_functions, FLAG_wasm_tiering_budget);
module_->num_declared_functions, kCounterStart); } else {
// Start counter at 4 to avoid runtime calls for smaller numbers.
constexpr int kCounterStart = 4;
std::fill_n(num_liftoff_function_calls_.get(),
module_->num_declared_functions, kCounterStart);
}
} }
// Even though there cannot be another thread using this object (since we are // Even though there cannot be another thread using this object (since we are
// just constructing it), we need to hold the mutex to fulfill the // just constructing it), we need to hold the mutex to fulfill the
......
...@@ -161,7 +161,7 @@ constexpr int kAnonymousFuncIndex = -1; ...@@ -161,7 +161,7 @@ constexpr int kAnonymousFuncIndex = -1;
constexpr uint32_t kGenericWrapperBudget = 1000; constexpr uint32_t kGenericWrapperBudget = 1000;
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64
constexpr int32_t kOSRTargetOffset = 4 * kSystemPointerSize; constexpr int32_t kOSRTargetOffset = 5 * kSystemPointerSize;
#endif #endif
} // namespace wasm } // namespace wasm
......
...@@ -269,6 +269,7 @@ struct CallSiteFeedback { ...@@ -269,6 +269,7 @@ struct CallSiteFeedback {
struct FunctionTypeFeedback { struct FunctionTypeFeedback {
std::vector<CallSiteFeedback> feedback_vector; std::vector<CallSiteFeedback> feedback_vector;
std::map<WasmCodePosition, int> positions; std::map<WasmCodePosition, int> positions;
int tierup_priority = 0;
}; };
struct TypeFeedbackStorage { struct TypeFeedbackStorage {
std::map<uint32_t, FunctionTypeFeedback> feedback_for_function; std::map<uint32_t, FunctionTypeFeedback> feedback_for_function;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment