Commit b0dc60f6 authored by Predrag Rudic's avatar Predrag Rudic Committed by Commit Bot

MIPS[64]: Port '[Deopt] Remove jump table in prologue of deopt entries.'

Port commit 4ab96a9a

Original message:
> Remove the use of a jump table in the prologue of the deopt entries
> and instead pass the bailout id explicitly in a register when calling
> the deopt entry routine from optimized code. This unifies the logic
> with the way the Arm64 code works. It saves the following amount of
> memory in code stubs:
>
>  - arm:  384KB
>  - ia32: 480KB
>  - x64:  240KB
>
> This could be offset by a slight increase in the size of optimized code
> for loading the immediate, however this impact should be minimal and
> will scale with the maximum number of bailout ids (e.g., the size of
> code will increase by one instruction per bailout id on Arm, therefore
> ~98,000 bailouts will be needed before the overhead is greater than
> the current fixed table size).
>
> Change-Id: I838604b48fa04cbd45320c7b9dac0de08fd8eb25
> Reviewed-on: https://chromium-review.googlesource.com/c/1398224
> Commit-Queue: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#58636}

Change-Id: I4d070b90ebd4f9d4e82eaa74fe6d41c3a39d93e8
Reviewed-on: https://chromium-review.googlesource.com/c/1400848Reviewed-by: 's avatarSreten Kovacevic <skovacevic@wavecomp.com>
Commit-Queue: Sreten Kovacevic <skovacevic@wavecomp.com>
Cr-Commit-Position: refs/heads/master@{#58655}
parent ba712bf8
......@@ -202,11 +202,6 @@ bool RelocInfo::IsInConstantPool() {
return false;
}
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
DCHECK(IsRuntimeEntry(rmode_));
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
}
uint32_t RelocInfo::wasm_call_tag() const {
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
return static_cast<uint32_t>(
......
......@@ -15,10 +15,9 @@ namespace internal {
// This code tries to be close to ia32 code so that any changes can be
// easily ported.
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
Isolate* isolate, int count,
Isolate* isolate,
DeoptimizeKind deopt_kind) {
NoRootArrayScope no_root_array(masm);
GenerateDeoptimizationEntriesPrologue(masm, count);
// Unlike on ARM we don't save all the registers, just the useful ones.
// For the rest, there are gaps on the stack, so the offsets remain the same.
......@@ -64,16 +63,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
const int kSavedRegistersAreaSize =
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
// Get the bailout id from the stack.
__ lw(a2, MemOperand(sp, kSavedRegistersAreaSize));
// Get the bailout id is passed as kRootRegister by the caller.
__ mov(a2, kRootRegister);
// Get the address of the location in the code object (a3) (return
// address for lazy deoptimization) and compute the fp-to-sp delta in
// register t0.
__ mov(a3, ra);
// Correct one word for bailout id.
__ Addu(t0, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
__ Addu(t0, sp, Operand(kSavedRegistersAreaSize));
__ Subu(t0, fp, t0);
// Allocate a new deoptimizer object.
......@@ -139,8 +136,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ swc1(f0, MemOperand(a1, dst_offset));
}
// Remove the bailout id and the saved registers from the stack.
__ Addu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
// Remove the saved registers from the stack.
__ Addu(sp, sp, Operand(kSavedRegistersAreaSize));
// Compute a pointer to the unwinding limit in register a2; that is
// the first stack slot not part of the input frame.
......@@ -240,73 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
#endif
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
int count) {
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
// Create a sequence of deoptimization entries.
// Note that registers are still live when jumping to an entry.
Label table_start, done, trampoline_jump;
__ bind(&table_start);
#ifdef _MIPS_ARCH_MIPS32R6
int kMaxEntriesBranchReach =
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
#else
int kMaxEntriesBranchReach =
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
#endif
if (count <= kMaxEntriesBranchReach) {
// Common case.
for (int i = 0; i < count; i++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (IsMipsArchVariant(kMips32r6)) {
__ li(kScratchReg, i);
__ BranchShort(PROTECT, &done);
} else {
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&done);
__ Push(kScratchReg);
} else {
DCHECK(!IsMipsArchVariant(kMips32r6));
// Uncommon case, the branch cannot reach.
// Create mini trampoline to reach the end of the table
for (int i = 0, j = 0; i < count; i++, j++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (j >= kMaxEntriesBranchReach) {
j = 0;
__ li(kScratchReg, i);
__ bind(&trampoline_jump);
trampoline_jump = Label();
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
__ nop();
} else {
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&trampoline_jump);
__ Push(kScratchReg);
}
}
bool Deoptimizer::PadTopOfStackRegister() { return false; }
......
......@@ -5519,6 +5519,17 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
li(kSpeculationPoisonRegister, -1);
}
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
NoRootArrayScope no_root_array(this);
// Save the deipt id in kRootRegister (we don't need the roots array from now
// on).
DCHECK_LE(deopt_id, 0xFFFF);
li(kRootRegister, deopt_id);
Call(target, RelocInfo::RUNTIME_ENTRY);
}
} // namespace internal
} // namespace v8
......
......@@ -252,11 +252,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// The return address on the stack is used by frame iteration.
void StoreReturnAddressAndCall(Register target);
void CallForDeoptimization(Address target, int deopt_id,
RelocInfo::Mode rmode) {
USE(deopt_id);
Call(target, rmode);
}
void CallForDeoptimization(Address target, int deopt_id);
void Ret(COND_ARGS);
inline void Ret(BranchDelaySlot bd, Condition cond = al,
......
......@@ -179,11 +179,6 @@ bool RelocInfo::IsInConstantPool() {
return false;
}
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
DCHECK(IsRuntimeEntry(rmode_));
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
}
uint32_t RelocInfo::wasm_call_tag() const {
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
return static_cast<uint32_t>(
......
......@@ -15,10 +15,9 @@ namespace internal {
// This code tries to be close to ia32 code so that any changes can be
// easily ported.
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
Isolate* isolate, int count,
Isolate* isolate,
DeoptimizeKind deopt_kind) {
NoRootArrayScope no_root_array(masm);
GenerateDeoptimizationEntriesPrologue(masm, count);
// Unlike on ARM we don't save all the registers, just the useful ones.
// For the rest, there are gaps on the stack, so the offsets remain the same.
......@@ -65,15 +64,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
const int kSavedRegistersAreaSize =
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
// Get the bailout id from the stack.
__ Ld(a2, MemOperand(sp, kSavedRegistersAreaSize));
// Get the bailout is passed as kRootRegister by the caller.
__ mov(a2, kRootRegister);
// Get the address of the location in the code object (a3) (return
// address for lazy deoptimization) and compute the fp-to-sp delta in
// register a4.
__ mov(a3, ra);
// Correct one word for bailout id.
__ Daddu(a4, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
__ Daddu(a4, sp, Operand(kSavedRegistersAreaSize));
__ Dsubu(a4, fp, a4);
......@@ -140,8 +138,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ Swc1(f0, MemOperand(a1, dst_offset));
}
// Remove the bailout id and the saved registers from the stack.
__ Daddu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
// Remove the saved registers from the stack.
__ Daddu(sp, sp, Operand(kSavedRegistersAreaSize));
// Compute a pointer to the unwinding limit in register a2; that is
// the first stack slot not part of the input frame.
......@@ -239,74 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
#endif
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
int count) {
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
// Create a sequence of deoptimization entries.
// Note that registers are still live when jumping to an entry.
Label table_start, done, trampoline_jump;
__ bind(&table_start);
#ifdef _MIPS_ARCH_MIPS64R6
int kMaxEntriesBranchReach =
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
#else
int kMaxEntriesBranchReach =
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
#endif
if (count <= kMaxEntriesBranchReach) {
// Common case.
for (int i = 0; i < count; i++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (kArchVariant == kMips64r6) {
__ li(kScratchReg, i);
__ BranchShort(PROTECT, &done);
} else {
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&done);
__ Push(kScratchReg);
} else {
DCHECK_NE(kArchVariant, kMips64r6);
// Uncommon case, the branch cannot reach.
// Create mini trampoline to reach the end of the table
for (int i = 0, j = 0; i < count; i++, j++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (j >= kMaxEntriesBranchReach) {
j = 0;
__ li(kScratchReg, i);
__ bind(&trampoline_jump);
trampoline_jump = Label();
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
__ nop();
} else {
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&trampoline_jump);
__ Push(kScratchReg);
}
}
bool Deoptimizer::PadTopOfStackRegister() { return false; }
void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) {
......
......@@ -5886,6 +5886,16 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
li(kSpeculationPoisonRegister, -1);
}
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
NoRootArrayScope no_root_array(this);
// Save the deopt id in kRootRegister (we don't need the roots array from now
// on).
DCHECK_LE(deopt_id, 0xFFFF);
li(kRootRegister, deopt_id);
Call(target, RelocInfo::RUNTIME_ENTRY);
}
} // namespace internal
} // namespace v8
......
......@@ -277,11 +277,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// The return address on the stack is used by frame iteration.
void StoreReturnAddressAndCall(Register target);
void CallForDeoptimization(Address target, int deopt_id,
RelocInfo::Mode rmode) {
USE(deopt_id);
Call(target, rmode);
}
void CallForDeoptimization(Address target, int deopt_id);
void Ret(COND_ARGS);
inline void Ret(BranchDelaySlot bd, Condition cond = al,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment