Commit 2b24df99 authored by Kim-Anh Tran's avatar Kim-Anh Tran Committed by Commit Bot

[wasm] Fix Liftoff-prologue for tiering to correctly restore state

When using registers during the Liftoff-prologue, we need to make sure
that all reserved registers are correctly pushed to and restored
from stack.

Change-Id: Iac444448cfd99fca70a811cb941d0cf5979d638b
Reviewed-on: https://chromium-review.googlesource.com/1005754
Commit-Queue: Kim-Anh Tran <kimanh@google.com>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Reviewed-by: 's avatarClemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#52555}
parent 34f86aee
...@@ -30,6 +30,8 @@ constexpr RegList kLiftoffAssemblerGpCacheRegs = ...@@ -30,6 +30,8 @@ constexpr RegList kLiftoffAssemblerGpCacheRegs =
constexpr RegList kLiftoffAssemblerFpCacheRegs = constexpr RegList kLiftoffAssemblerFpCacheRegs =
DoubleRegister::ListOf<xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6>(); DoubleRegister::ListOf<xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6>();
constexpr Register kNoParamRegister = edi;
#elif V8_TARGET_ARCH_X64 #elif V8_TARGET_ARCH_X64
constexpr RegList kLiftoffAssemblerGpCacheRegs = constexpr RegList kLiftoffAssemblerGpCacheRegs =
...@@ -38,6 +40,8 @@ constexpr RegList kLiftoffAssemblerGpCacheRegs = ...@@ -38,6 +40,8 @@ constexpr RegList kLiftoffAssemblerGpCacheRegs =
constexpr RegList kLiftoffAssemblerFpCacheRegs = constexpr RegList kLiftoffAssemblerFpCacheRegs =
DoubleRegister::ListOf<xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7>(); DoubleRegister::ListOf<xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7>();
constexpr Register kNoParamRegister = r8;
#elif V8_TARGET_ARCH_MIPS #elif V8_TARGET_ARCH_MIPS
constexpr RegList kLiftoffAssemblerGpCacheRegs = constexpr RegList kLiftoffAssemblerGpCacheRegs =
...@@ -47,6 +51,8 @@ constexpr RegList kLiftoffAssemblerFpCacheRegs = ...@@ -47,6 +51,8 @@ constexpr RegList kLiftoffAssemblerFpCacheRegs =
DoubleRegister::ListOf<f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, DoubleRegister::ListOf<f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20,
f22, f24>(); f22, f24>();
constexpr Register kNoParamRegister = t0;
#elif V8_TARGET_ARCH_MIPS64 #elif V8_TARGET_ARCH_MIPS64
constexpr RegList kLiftoffAssemblerGpCacheRegs = constexpr RegList kLiftoffAssemblerGpCacheRegs =
...@@ -56,12 +62,18 @@ constexpr RegList kLiftoffAssemblerFpCacheRegs = ...@@ -56,12 +62,18 @@ constexpr RegList kLiftoffAssemblerFpCacheRegs =
DoubleRegister::ListOf<f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, DoubleRegister::ListOf<f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20,
f22, f24, f26>(); f22, f24, f26>();
constexpr Register kNoParamRegister = t0;
#else #else
constexpr RegList kLiftoffAssemblerGpCacheRegs = 0xff; constexpr RegList kLiftoffAssemblerGpCacheRegs = 0xff;
constexpr RegList kLiftoffAssemblerFpCacheRegs = 0xff; constexpr RegList kLiftoffAssemblerFpCacheRegs = 0xff;
// This should be an allocatable, general purpose register
// that is not used for parameters, see {wasm-linkage.cc}.
constexpr Register kNoParamRegister = Register::no_reg();
#endif #endif
#if V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_X64
......
...@@ -216,6 +216,33 @@ class LiftoffCompiler { ...@@ -216,6 +216,33 @@ class LiftoffCompiler {
} }
} }
void CollectReservedRegsForParameters(uint32_t input_idx_start,
uint32_t num_params,
LiftoffRegList& param_regs) {
uint32_t input_idx = input_idx_start;
for (uint32_t param_idx = 0; param_idx < num_params; ++param_idx) {
ValueType type = __ local_type(param_idx);
const int num_lowered_params = 1 + needs_reg_pair(type);
RegClass rc = num_lowered_params == 1 ? reg_class_for(type) : kGpReg;
for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
compiler::LinkageLocation param_loc =
descriptor_->GetInputLocation(input_idx + pair_idx);
if (param_loc.IsRegister()) {
DCHECK(!param_loc.IsAnyRegister());
int reg_code = param_loc.AsRegister();
RegList cache_regs = rc == kGpReg ? kLiftoffAssemblerGpCacheRegs
: kLiftoffAssemblerFpCacheRegs;
if (cache_regs & (1 << reg_code)) {
LiftoffRegister in_reg = LiftoffRegister::from_code(rc, reg_code);
param_regs.set(in_reg);
}
}
}
input_idx += num_lowered_params;
}
}
// Returns the number of inputs processed (1 or 2). // Returns the number of inputs processed (1 or 2).
uint32_t ProcessParameter(ValueType type, uint32_t input_idx) { uint32_t ProcessParameter(ValueType type, uint32_t input_idx) {
const int num_lowered_params = 1 + needs_reg_pair(type); const int num_lowered_params = 1 + needs_reg_pair(type);
...@@ -275,48 +302,88 @@ class LiftoffCompiler { ...@@ -275,48 +302,88 @@ class LiftoffCompiler {
// Inserts a check whether the optimized version of this code already exists. // Inserts a check whether the optimized version of this code already exists.
// If so, it redirects execution to the optimized code. // If so, it redirects execution to the optimized code.
void JumpToOptimizedCodeIfExisting() { void JumpToOptimizedCodeIfExisting(LiftoffRegList param_regs) {
// Check whether we have an optimized function before // We need one register to keep the address of the optimized
// continuing to execute the Liftoff-compiled code. // code that is not used to keep parameters.
// TODO(clemensh): Reduce number of temporary registers. LiftoffRegister address_tmp = LiftoffRegister(kNoParamRegister);
LiftoffRegList pinned; DCHECK(!param_regs.has(address_tmp));
LiftoffRegister wasm_code_addr =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister target_code_addr =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister code_start_address =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
// Get the current code's target address ({instructions_.start()}). LiftoffRegList available_regs = kGpCacheRegList & ~param_regs;
__ ComputeCodeStartAddress(code_start_address.gp()); // We already use the {address_tmp} later, so remove it too.
available_regs.clear(address_tmp);
// We require one general purpose register.
if (available_regs.is_empty()) {
LiftoffRegList taken_gp_regs = kGpCacheRegList & param_regs;
LiftoffRegister reg = taken_gp_regs.GetFirstRegSet();
available_regs.set(reg);
}
LiftoffRegister tmp = available_regs.GetFirstRegSet();
if (param_regs.has(tmp)) __ PushRegisters(LiftoffRegList::ForRegs(tmp));
static LoadType kPointerLoadType = static LoadType kPointerLoadType =
LoadType::ForValueType(LiftoffAssembler::kWasmIntPtr); LoadType::ForValueType(LiftoffAssembler::kWasmIntPtr);
using int_t = std::conditional<kPointerSize == 8, uint64_t, uint32_t>::type; using int_t = std::conditional<kPointerSize == 8, uint64_t, uint32_t>::type;
static_assert(sizeof(int_t) == sizeof(uintptr_t), "weird uintptr_t"); static_assert(sizeof(int_t) == sizeof(uintptr_t), "weird uintptr_t");
// Get the address of the WasmCode* currently stored in the code table. // Get the address of the WasmCode* currently stored in the code table.
__ LoadConstant(target_code_addr, __ LoadConstant(address_tmp,
WasmValue(reinterpret_cast<int_t>(code_table_entry_)), WasmValue(reinterpret_cast<int_t>(code_table_entry_)),
RelocInfo::WASM_CODE_TABLE_ENTRY); RelocInfo::WASM_CODE_TABLE_ENTRY);
// Load the corresponding WasmCode*. // Load the corresponding WasmCode*.
__ Load(wasm_code_addr, target_code_addr.gp(), Register::no_reg(), 0, LiftoffRegister wasm_code_address = tmp;
kPointerLoadType, pinned); __ Load(wasm_code_address, address_tmp.gp(), Register::no_reg(), 0,
kPointerLoadType, param_regs);
// Load its target address ({instuctions_.start()}). // Load its target address ({instuctions_.start()}).
__ Load(target_code_addr, wasm_code_addr.gp(), Register::no_reg(), __ Load(address_tmp, wasm_code_address.gp(), Register::no_reg(),
WasmCode::kInstructionStartOffset, kPointerLoadType, pinned); WasmCode::kInstructionStartOffset, kPointerLoadType, param_regs);
// Get the current code's target address ({instructions_.start()}).
LiftoffRegister code_start_address = tmp;
__ ComputeCodeStartAddress(code_start_address.gp());
// If the current code's target address is the same as the // If the current code's target address is the same as the
// target address of the stored WasmCode, then continue executing, otherwise // target address of the stored WasmCode, then continue executing, otherwise
// jump to the updated WasmCode. // jump to the updated WasmCode.
Label cont; Label cont;
__ emit_cond_jump(kEqual, &cont, LiftoffAssembler::kWasmIntPtr, __ emit_cond_jump(kEqual, &cont, LiftoffAssembler::kWasmIntPtr,
target_code_addr.gp(), code_start_address.gp()); address_tmp.gp(), code_start_address.gp());
__ LeaveFrame(StackFrame::WASM_COMPILED);
__ emit_jump(target_code_addr.gp()); if (param_regs.has(tmp)) __ PopRegisters(LiftoffRegList::ForRegs(tmp));
__ emit_jump(address_tmp.gp());
__ bind(&cont); __ bind(&cont);
if (param_regs.has(tmp)) __ PopRegisters(LiftoffRegList::ForRegs(tmp));
} }
void StartFunctionBody(Decoder* decoder, Control* block) { void StartFunctionBody(Decoder* decoder, Control* block) {
// Input 0 is the call target, the instance is at 1.
constexpr int kInstanceParameterIndex = 1;
// Store the instance parameter to a special stack slot.
compiler::LinkageLocation instance_loc =
descriptor_->GetInputLocation(kInstanceParameterIndex);
DCHECK(instance_loc.IsRegister());
DCHECK(!instance_loc.IsAnyRegister());
Register instance_reg = Register::from_code(instance_loc.AsRegister());
// Parameter 0 is the instance parameter.
uint32_t num_params =
static_cast<uint32_t>(decoder->sig_->parameter_count());
if (FLAG_wasm_tier_up) {
if (!kNoParamRegister.is_valid()) {
unsupported(decoder, "Please define kNoParamRegister.");
return;
}
// Collect all registers that are allocated on function entry.
LiftoffRegList param_regs;
param_regs.set(instance_reg);
CollectReservedRegsForParameters(kInstanceParameterIndex + 1, num_params,
param_regs);
JumpToOptimizedCodeIfExisting(param_regs);
}
__ EnterFrame(StackFrame::WASM_COMPILED); __ EnterFrame(StackFrame::WASM_COMPILED);
__ set_has_frame(true); __ set_has_frame(true);
pc_offset_stack_frame_construction_ = __ PrepareStackFrame(); pc_offset_stack_frame_construction_ = __ PrepareStackFrame();
...@@ -327,21 +394,11 @@ class LiftoffCompiler { ...@@ -327,21 +394,11 @@ class LiftoffCompiler {
// finish compilation without errors even if we hit unimplemented // finish compilation without errors even if we hit unimplemented
// LiftoffAssembler methods. // LiftoffAssembler methods.
if (DidAssemblerBailout(decoder)) return; if (DidAssemblerBailout(decoder)) return;
// Parameter 0 is the instance parameter.
uint32_t num_params =
static_cast<uint32_t>(decoder->sig_->parameter_count());
for (uint32_t i = 0; i < __ num_locals(); ++i) { for (uint32_t i = 0; i < __ num_locals(); ++i) {
if (!CheckSupportedType(decoder, kTypes_ilfd, __ local_type(i), "param")) if (!CheckSupportedType(decoder, kTypes_ilfd, __ local_type(i), "param"))
return; return;
} }
// Input 0 is the call target, the instance is at 1.
constexpr int kInstanceParameterIndex = 1;
// Store the instance parameter to a special stack slot.
compiler::LinkageLocation instance_loc =
descriptor_->GetInputLocation(kInstanceParameterIndex);
DCHECK(instance_loc.IsRegister());
DCHECK(!instance_loc.IsAnyRegister());
Register instance_reg = Register::from_code(instance_loc.AsRegister());
__ SpillInstance(instance_reg); __ SpillInstance(instance_reg);
// Input 0 is the code target, 1 is the instance. First parameter at 2. // Input 0 is the code target, 1 is the instance. First parameter at 2.
uint32_t input_idx = kInstanceParameterIndex + 1; uint32_t input_idx = kInstanceParameterIndex + 1;
...@@ -384,14 +441,6 @@ class LiftoffCompiler { ...@@ -384,14 +441,6 @@ class LiftoffCompiler {
StackCheck(0); StackCheck(0);
DCHECK_EQ(__ num_locals(), __ cache_state()->stack_height()); DCHECK_EQ(__ num_locals(), __ cache_state()->stack_height());
// TODO(kimanh): if possible, we want to move this check further up,
// in order to avoid unnecessary overhead each time we enter
// a Liftoff-compiled function that will jump to a Turbofan-compiled
// function.
if (FLAG_wasm_tier_up) {
JumpToOptimizedCodeIfExisting();
}
} }
void GenerateOutOfLineCode(OutOfLineCode& ool) { void GenerateOutOfLineCode(OutOfLineCode& ool) {
......
...@@ -3293,6 +3293,69 @@ WASM_EXEC_TEST(I64ShrUOnDifferentRegisters) { ...@@ -3293,6 +3293,69 @@ WASM_EXEC_TEST(I64ShrUOnDifferentRegisters) {
}); });
} }
TEST(Liftoff_prologue) {
// The tested prologue is only inserted in tiering mode. The prologue
// is responsible for jumping to the optimized, tiered up code if
// it exists.
FlagScope<bool> tier_up_scope(&v8::internal::FLAG_wasm_tier_up, true);
// The number of parameters define how many registers are used
// on a function call. The Liftoff-prologue has to make sure to
// correctly save prior, and restore all parameters
// after the prologue.
const uint8_t kNumParams = 4;
ValueType int_types[kNumParams + 1];
for (int i = 0; i < kNumParams + 1; i++) int_types[i] = kWasmI32;
FunctionSig sig_i_x(1, kNumParams, int_types);
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(
WasmExecutionMode::kExecuteLiftoff);
// Define two functions: {add_locals} and {sub_locals}, whereas
// {sub_locals} shall be our mockup optimized code.
std::vector<byte> add_locals, sub_locals;
ADD_CODE(add_locals, WASM_I32_ADD(WASM_GET_LOCAL(0), WASM_GET_LOCAL(1)));
ADD_CODE(sub_locals, WASM_I32_SUB(WASM_GET_LOCAL(0), WASM_GET_LOCAL(1)));
for (int i = 2; i < kNumParams; ++i) {
ADD_CODE(add_locals, WASM_GET_LOCAL(i), kExprI32Add);
ADD_CODE(sub_locals, WASM_GET_LOCAL(i), kExprI32Sub);
}
WasmFunctionCompiler& add_compiler = r.NewFunction(&sig_i_x);
add_compiler.Build(&add_locals[0], &add_locals[0] + add_locals.size());
WasmFunctionCompiler& sub_compiler = r.NewFunction(&sig_i_x);
sub_compiler.Build(&sub_locals[0], &sub_locals[0] + sub_locals.size());
// Create a calling function, which shall call {add_locals}.
std::vector<byte> call;
for (int i = 0; i < kNumParams; ++i) {
ADD_CODE(call, WASM_GET_LOCAL(i));
}
ADD_CODE(call, kExprCallFunction,
static_cast<byte>(add_compiler.function_index()));
r.Build(&call[0], &call[0] + call.size());
NativeModule* native_module =
r.builder().instance_object()->compiled_module()->GetNativeModule();
// This test only works if we managed to compile with Liftoff.
if (native_module->GetCode(add_compiler.function_index())->is_liftoff()) {
// First run should execute {add_locals}.
CHECK_EQ(10, r.Call(1, 2, 3, 4));
// Update the native_module to contain the "optimized" code ({sub_locals}).
native_module->SetCode(
add_compiler.function_index(),
native_module->GetCode(sub_compiler.function_index()));
// Second run should execute {add_locals}, which should detect that
// the code was updated, and run {sub_locals}.
CHECK_EQ(-8, r.Call(1, 2, 3, 4));
}
}
#undef B1 #undef B1
#undef B2 #undef B2
#undef RET #undef RET
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment