Commit 3eb4de34 authored by sreten.kovacevic's avatar sreten.kovacevic Committed by Commit Bot

MIPS[64]: Port `[arm] Introduce UseScratchRegisterScope`

Add UseScratchRegisterScope for MIPS and use it instead of using at register directly.

Original commit message:
`Introduce a stripped down version of UseScratchRegisterScope for ARM and use it
inside the assembler and macro-assembler. At the exception of the Call
instructions, we now use this scope instead of using the ip register
directly. This is inspired from how the ARM64 backend works.

In general, the benefit of doing this is we can catch cases where ip is being
used both by the caller and by the assembler. But more specifically, TurboFan
reserves r9 as an extra scratch register because ip can already be used by the
assembler. With this utility, we can isolate the cases in the code generator
which need an extra register and potentially fix them, allowing us to give r9
back to the register allocator.

This patch uncovered places in the assembler where we were using ip
unconditionally when we could have re-used the destination register instead.`

Bug: 
Change-Id: I1a35c1661579882801605337abfc95f75b47f052
Reviewed-on: https://chromium-review.googlesource.com/574923
Commit-Queue: Ivica Bogosavljevic <ivica.bogosavljevic@imgtec.com>
Reviewed-by: 's avatarIvica Bogosavljevic <ivica.bogosavljevic@imgtec.com>
Cr-Commit-Position: refs/heads/master@{#46963}
parent ad09fd60
......@@ -315,7 +315,8 @@ const Instr kLwSwInstrArgumentMask = ~kLwSwInstrTypeMask;
const Instr kLwSwOffsetMask = kImm16Mask;
Assembler::Assembler(IsolateData isolate_data, void* buffer, int buffer_size)
: AssemblerBase(isolate_data, buffer, buffer_size) {
: AssemblerBase(isolate_data, buffer, buffer_size),
scratch_register_list_(at.bit()) {
reloc_info_writer.Reposition(buffer_ + buffer_size_, pc_);
last_trampoline_pool_end_ = 0;
......@@ -1983,9 +1984,10 @@ void Assembler::AdjustBaseAndOffset(MemOperand& src,
// value) fits into int16_t.
return;
}
DCHECK(!src.rm().is(
at)); // Must not overwrite the register 'base' while loading 'offset'.
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
DCHECK(!src.rm().is(scratch)); // Must not overwrite the register 'base'
// while loading 'offset'.
#ifdef DEBUG
// Remember the "(mis)alignment" of 'offset', it will be checked at the end.
......@@ -2023,12 +2025,12 @@ void Assembler::AdjustBaseAndOffset(MemOperand& src,
offset_high += (offset_low < 0)
? 1
: 0; // Account for offset sign extension in load/store.
aui(at, src.rm(), static_cast<uint16_t>(offset_high));
aui(scratch, src.rm(), static_cast<uint16_t>(offset_high));
if (two_accesses && !is_int16(static_cast<int32_t>(
offset_low + second_access_add_to_offset))) {
// Avoid overflow in the 16-bit offset of the load/store instruction when
// adding 4.
addiu(at, at, kDoubleSize);
addiu(scratch, scratch, kDoubleSize);
offset_low -= kDoubleSize;
}
src.offset_ = offset_low;
......@@ -2043,25 +2045,25 @@ void Assembler::AdjustBaseAndOffset(MemOperand& src,
constexpr int32_t kMaxOffsetForMediumAdjustment =
3 * kMinOffsetForSimpleAdjustment;
if (0 <= src.offset() && src.offset() <= kMaxOffsetForMediumAdjustment) {
addiu(at, src.rm(), kMinOffsetForMediumAdjustment / 2);
addiu(at, at, kMinOffsetForMediumAdjustment / 2);
addiu(scratch, src.rm(), kMinOffsetForMediumAdjustment / 2);
addiu(scratch, scratch, kMinOffsetForMediumAdjustment / 2);
src.offset_ -= kMinOffsetForMediumAdjustment;
} else if (-kMaxOffsetForMediumAdjustment <= src.offset() &&
src.offset() < 0) {
addiu(at, src.rm(), -kMinOffsetForMediumAdjustment / 2);
addiu(at, at, -kMinOffsetForMediumAdjustment / 2);
addiu(scratch, src.rm(), -kMinOffsetForMediumAdjustment / 2);
addiu(scratch, scratch, -kMinOffsetForMediumAdjustment / 2);
src.offset_ += kMinOffsetForMediumAdjustment;
} else {
// Now that all shorter options have been exhausted, load the full 32-bit
// offset.
int32_t loaded_offset = RoundDown(src.offset(), kDoubleSize);
lui(at, (loaded_offset >> kLuiShift) & kImm16Mask);
ori(at, at, loaded_offset & kImm16Mask); // Load 32-bit offset.
addu(at, at, src.rm());
lui(scratch, (loaded_offset >> kLuiShift) & kImm16Mask);
ori(scratch, scratch, loaded_offset & kImm16Mask); // Load 32-bit offset.
addu(scratch, scratch, src.rm());
src.offset_ -= loaded_offset;
}
}
src.rm_ = at;
src.rm_ = scratch;
DCHECK(is_int16(src.offset()));
if (two_accesses) {
......@@ -3116,9 +3118,11 @@ MSA_BRANCH_LIST(MSA_BRANCH)
if (is_int10(source.offset())) { \
GenInstrMsaMI10(opcode, source.offset(), source.rm(), wd); \
} else { \
DCHECK(!rs.rm().is(at)); \
addiu(at, source.rm(), source.offset()); \
GenInstrMsaMI10(opcode, 0, at, wd); \
UseScratchRegisterScope temps(this); \
Register scratch = temps.Acquire(); \
DCHECK(!rs.rm().is(scratch)); \
addiu(scratch, source.rm(), source.offset()); \
GenInstrMsaMI10(opcode, 0, scratch, wd); \
} \
}
......@@ -3788,8 +3792,10 @@ void Assembler::CheckTrampolinePool() {
// references until associated instructions are emitted and
// available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
lui(at, lui_offset);
jic(at, jic_offset);
UseScratchRegisterScope temps(this);
Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
lui(scratch, lui_offset);
jic(scratch, jic_offset);
}
CheckBuffer();
}
......@@ -3797,17 +3803,19 @@ void Assembler::CheckTrampolinePool() {
for (int i = 0; i < unbound_labels_count_; i++) {
uint32_t imm32;
imm32 = jump_address(&after_pool);
UseScratchRegisterScope temps(this);
Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
{
BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal
// references until associated instructions are emitted and
// available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
lui(at, (imm32 & kHiMask) >> kLuiShift);
ori(at, at, (imm32 & kImm16Mask));
lui(scratch, (imm32 & kHiMask) >> kLuiShift);
ori(scratch, scratch, (imm32 & kImm16Mask));
}
CheckBuffer();
jr(at);
jr(scratch);
nop();
}
}
......@@ -3907,6 +3915,25 @@ void Assembler::set_target_value_at(Isolate* isolate, Address pc,
}
}
UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
: available_(assembler->GetScratchRegisterList()),
old_available_(*available_) {}
UseScratchRegisterScope::~UseScratchRegisterScope() {
*available_ = old_available_;
}
Register UseScratchRegisterScope::Acquire() {
DCHECK(available_ != nullptr);
DCHECK(*available_ != 0);
int index = static_cast<int>(base::bits::CountTrailingZeros32(*available_));
*available_ &= ~(1UL << index);
return Register::from_code(index);
}
bool UseScratchRegisterScope::hasAvailable() const { return *available_ != 0; }
} // namespace internal
} // namespace v8
......
......@@ -703,6 +703,8 @@ class Assembler : public AssemblerBase {
static constexpr int kTrampolineSlotsSize = 4 * kInstrSize;
#endif
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
// ---------------------------------------------------------------------------
// Code generation.
......@@ -1977,6 +1979,8 @@ class Assembler : public AssemblerBase {
inline void CheckBuffer();
RegList scratch_register_list_;
private:
// Avoid overflows for displacements etc.
static const int kMaximalBufferSize = 512 * MB;
......@@ -2287,6 +2291,19 @@ class EnsureSpace BASE_EMBEDDED {
}
};
class UseScratchRegisterScope {
public:
explicit UseScratchRegisterScope(Assembler* assembler);
~UseScratchRegisterScope();
Register Acquire();
bool hasAvailable() const;
private:
RegList* available_;
RegList old_available_;
};
} // namespace internal
} // namespace v8
......
This diff is collapsed.
......@@ -911,21 +911,27 @@ class MacroAssembler : public TurboAssembler {
void Store(Register src, const MemOperand& dst, Representation r);
void PushRoot(Heap::RootListIndex index) {
LoadRoot(at, index);
Push(at);
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
LoadRoot(scratch, index);
Push(scratch);
}
// Compare the object in a register to a value and jump if they are equal.
void JumpIfRoot(Register with, Heap::RootListIndex index, Label* if_equal) {
LoadRoot(at, index);
Branch(if_equal, eq, with, Operand(at));
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
LoadRoot(scratch, index);
Branch(if_equal, eq, with, Operand(scratch));
}
// Compare the object in a register to a value and jump if they are not equal.
void JumpIfNotRoot(Register with, Heap::RootListIndex index,
Label* if_not_equal) {
LoadRoot(at, index);
Branch(if_not_equal, ne, with, Operand(at));
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
LoadRoot(scratch, index);
Branch(if_not_equal, ne, with, Operand(scratch));
}
// ---------------------------------------------------------------------------
......@@ -1445,9 +1451,11 @@ const Operand& rt = Operand(zero_reg), BranchDelaySlot bd = PROTECT
Register src,
Register scratch,
Label* not_a_smi) {
SmiTagCheckOverflow(at, src, scratch);
UseScratchRegisterScope temps(this);
Register scratch1 = temps.Acquire();
SmiTagCheckOverflow(scratch1, src, scratch);
BranchOnOverflow(not_a_smi, scratch);
mov(dst, at);
mov(dst, scratch1);
}
// Test if the register contains a smi.
......@@ -1670,23 +1678,24 @@ class CodePatcher {
template <typename Func>
void TurboAssembler::GenerateSwitchTable(Register index, size_t case_count,
Func GetLabelFunction) {
Label here;
BlockTrampolinePoolFor(case_count + kSwitchTablePrologueSize);
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
if (kArchVariant >= kMips32r6) {
BlockTrampolinePoolFor(case_count + kSwitchTablePrologueSize);
addiupc(at, 5);
Lsa(at, at, index, kPointerSizeLog2);
lw(at, MemOperand(at));
addiupc(scratch, 5);
Lsa(scratch, scratch, index, kPointerSizeLog2);
lw(scratch, MemOperand(scratch));
} else {
Label here;
BlockTrampolinePoolFor(case_count + kSwitchTablePrologueSize);
push(ra);
bal(&here);
sll(at, index, kPointerSizeLog2); // Branch delay slot.
sll(scratch, index, kPointerSizeLog2); // Branch delay slot.
bind(&here);
addu(at, at, ra);
addu(scratch, scratch, ra);
pop(ra);
lw(at, MemOperand(at, 6 * v8::internal::Assembler::kInstrSize));
lw(scratch, MemOperand(scratch, 6 * v8::internal::Assembler::kInstrSize));
}
jr(at);
jr(scratch);
nop(); // Branch delay slot nop.
for (size_t index = 0; index < case_count; ++index) {
dd(GetLabelFunction(index));
......
......@@ -295,7 +295,8 @@ const Instr kLwSwInstrArgumentMask = ~kLwSwInstrTypeMask;
const Instr kLwSwOffsetMask = kImm16Mask;
Assembler::Assembler(IsolateData isolate_data, void* buffer, int buffer_size)
: AssemblerBase(isolate_data, buffer, buffer_size) {
: AssemblerBase(isolate_data, buffer, buffer_size),
scratch_register_list_(at.bit()) {
reloc_info_writer.Reposition(buffer_ + buffer_size_, pc_);
last_trampoline_pool_end_ = 0;
......@@ -2162,12 +2163,14 @@ void Assembler::AdjustBaseAndOffset(MemOperand& src,
constexpr int32_t kMaxOffsetForSimpleAdjustment =
2 * kMinOffsetForSimpleAdjustment;
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
if (0 <= src.offset() && src.offset() <= kMaxOffsetForSimpleAdjustment) {
daddiu(at, src.rm(), kMinOffsetForSimpleAdjustment);
daddiu(scratch, src.rm(), kMinOffsetForSimpleAdjustment);
src.offset_ -= kMinOffsetForSimpleAdjustment;
} else if (-kMaxOffsetForSimpleAdjustment <= src.offset() &&
src.offset() < 0) {
daddiu(at, src.rm(), -kMinOffsetForSimpleAdjustment);
daddiu(scratch, src.rm(), -kMinOffsetForSimpleAdjustment);
src.offset_ += kMinOffsetForSimpleAdjustment;
} else if (kArchVariant == kMips64r6) {
// On r6 take advantage of the daui instruction, e.g.:
......@@ -2190,17 +2193,17 @@ void Assembler::AdjustBaseAndOffset(MemOperand& src,
offset_high++;
overflow_hi16 = (offset_high == -32768);
}
daui(at, src.rm(), static_cast<uint16_t>(offset_high));
daui(scratch, src.rm(), static_cast<uint16_t>(offset_high));
if (overflow_hi16) {
dahi(at, 1);
dahi(scratch, 1);
}
if (two_accesses && !is_int16(static_cast<int32_t>(
offset_low32 + second_access_add_to_offset))) {
// Avoid overflow in the 16-bit offset of the load/store instruction when
// adding 4.
daddiu(at, at, kDoubleSize);
daddiu(scratch, scratch, kDoubleSize);
offset_low32 -= kDoubleSize;
}
......@@ -2216,25 +2219,25 @@ void Assembler::AdjustBaseAndOffset(MemOperand& src,
constexpr int32_t kMaxOffsetForMediumAdjustment =
3 * kMinOffsetForSimpleAdjustment;
if (0 <= src.offset() && src.offset() <= kMaxOffsetForMediumAdjustment) {
daddiu(at, src.rm(), kMinOffsetForMediumAdjustment / 2);
daddiu(at, at, kMinOffsetForMediumAdjustment / 2);
daddiu(scratch, src.rm(), kMinOffsetForMediumAdjustment / 2);
daddiu(scratch, scratch, kMinOffsetForMediumAdjustment / 2);
src.offset_ -= kMinOffsetForMediumAdjustment;
} else if (-kMaxOffsetForMediumAdjustment <= src.offset() &&
src.offset() < 0) {
daddiu(at, src.rm(), -kMinOffsetForMediumAdjustment / 2);
daddiu(at, at, -kMinOffsetForMediumAdjustment / 2);
daddiu(scratch, src.rm(), -kMinOffsetForMediumAdjustment / 2);
daddiu(scratch, scratch, -kMinOffsetForMediumAdjustment / 2);
src.offset_ += kMinOffsetForMediumAdjustment;
} else {
// Now that all shorter options have been exhausted, load the full 32-bit
// offset.
int32_t loaded_offset = RoundDown(src.offset(), kDoubleSize);
lui(at, (loaded_offset >> kLuiShift) & kImm16Mask);
ori(at, at, loaded_offset & kImm16Mask); // Load 32-bit offset.
daddu(at, at, src.rm());
lui(scratch, (loaded_offset >> kLuiShift) & kImm16Mask);
ori(scratch, scratch, loaded_offset & kImm16Mask); // Load 32-bit offset.
daddu(scratch, scratch, src.rm());
src.offset_ -= loaded_offset;
}
}
src.rm_ = at;
src.rm_ = scratch;
DCHECK(is_int16(src.offset()));
if (two_accesses) {
......@@ -3419,9 +3422,11 @@ MSA_BRANCH_LIST(MSA_BRANCH)
if (is_int10(source.offset())) { \
GenInstrMsaMI10(opcode, source.offset(), source.rm(), wd); \
} else { \
DCHECK(!rs.rm().is(at)); \
daddiu(at, source.rm(), source.offset()); \
GenInstrMsaMI10(opcode, 0, at, wd); \
UseScratchRegisterScope temps(this); \
Register scratch = temps.Acquire(); \
DCHECK(!rs.rm().is(scratch)); \
daddiu(scratch, source.rm(), source.offset()); \
GenInstrMsaMI10(opcode, 0, scratch, wd); \
} \
}
......@@ -4218,6 +4223,25 @@ void Assembler::set_target_value_at(Isolate* isolate, Address pc,
}
}
UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
: available_(assembler->GetScratchRegisterList()),
old_available_(*available_) {}
UseScratchRegisterScope::~UseScratchRegisterScope() {
*available_ = old_available_;
}
Register UseScratchRegisterScope::Acquire() {
DCHECK(available_ != nullptr);
DCHECK(*available_ != 0);
int index = static_cast<int>(base::bits::CountTrailingZeros32(*available_));
*available_ &= ~(1UL << index);
return Register::from_code(index);
}
bool UseScratchRegisterScope::hasAvailable() const { return *available_ != 0; }
} // namespace internal
} // namespace v8
......
......@@ -700,6 +700,8 @@ class Assembler : public AssemblerBase {
static constexpr int kTrampolineSlotsSize = 2 * kInstrSize;
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
// ---------------------------------------------------------------------------
// Code generation.
......@@ -2308,6 +2310,8 @@ class Assembler : public AssemblerBase {
Trampoline trampoline_;
bool internal_trampoline_exception_;
RegList scratch_register_list_;
// The following functions help with avoiding allocations of embedded heap
// objects during the code assembly phase. {RequestHeapObject} records the
// need for a future heap number allocation or code stub generation. After
......@@ -2340,6 +2344,19 @@ class EnsureSpace BASE_EMBEDDED {
}
};
class UseScratchRegisterScope {
public:
explicit UseScratchRegisterScope(Assembler* assembler);
~UseScratchRegisterScope();
Register Acquire();
bool hasAvailable() const;
private:
RegList* available_;
RegList old_available_;
};
} // namespace internal
} // namespace v8
......
This diff is collapsed.
......@@ -945,21 +945,27 @@ class MacroAssembler : public TurboAssembler {
void Store(Register src, const MemOperand& dst, Representation r);
void PushRoot(Heap::RootListIndex index) {
LoadRoot(at, index);
Push(at);
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
LoadRoot(scratch, index);
Push(scratch);
}
// Compare the object in a register to a value and jump if they are equal.
void JumpIfRoot(Register with, Heap::RootListIndex index, Label* if_equal) {
LoadRoot(at, index);
Branch(if_equal, eq, with, Operand(at));
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
LoadRoot(scratch, index);
Branch(if_equal, eq, with, Operand(scratch));
}
// Compare the object in a register to a value and jump if they are not equal.
void JumpIfNotRoot(Register with, Heap::RootListIndex index,
Label* if_not_equal) {
LoadRoot(at, index);
Branch(if_not_equal, ne, with, Operand(at));
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
LoadRoot(scratch, index);
Branch(if_not_equal, ne, with, Operand(scratch));
}
// ---------------------------------------------------------------------------
......@@ -1609,9 +1615,11 @@ const Operand& rt = Operand(zero_reg), BranchDelaySlot bd = PROTECT
if (SmiValuesAre32Bits()) {
SmiTag(dst, src);
} else {
SmiTagCheckOverflow(at, src, scratch);
UseScratchRegisterScope temps(this);
Register scratch1 = temps.Acquire();
SmiTagCheckOverflow(scratch1, src, scratch);
BranchOnOverflow(not_a_smi, scratch);
mov(dst, at);
mov(dst, scratch1);
}
}
......@@ -1859,30 +1867,30 @@ void TurboAssembler::GenerateSwitchTable(Register index, size_t case_count,
Func GetLabelFunction) {
// Ensure that dd-ed labels following this instruction use 8 bytes aligned
// addresses.
BlockTrampolinePoolFor(static_cast<int>(case_count) * 2 +
kSwitchTablePrologueSize);
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
if (kArchVariant >= kMips64r6) {
BlockTrampolinePoolFor(static_cast<int>(case_count) * 2 +
kSwitchTablePrologueSize);
// Opposite of Align(8) as we have odd number of instructions in this case.
if ((pc_offset() & 7) == 0) {
nop();
}
addiupc(at, 5);
Dlsa(at, at, index, kPointerSizeLog2);
Ld(at, MemOperand(at));
addiupc(scratch, 5);
Dlsa(scratch, scratch, index, kPointerSizeLog2);
Ld(scratch, MemOperand(scratch));
} else {
Label here;
BlockTrampolinePoolFor(static_cast<int>(case_count) * 2 +
kSwitchTablePrologueSize);
Align(8);
push(ra);
bal(&here);
dsll(at, index, kPointerSizeLog2); // Branch delay slot.
dsll(scratch, index, kPointerSizeLog2); // Branch delay slot.
bind(&here);
daddu(at, at, ra);
daddu(scratch, scratch, ra);
pop(ra);
Ld(at, MemOperand(at, 6 * v8::internal::Assembler::kInstrSize));
Ld(scratch, MemOperand(scratch, 6 * v8::internal::Assembler::kInstrSize));
}
jr(at);
jr(scratch);
nop(); // Branch delay slot nop.
for (size_t index = 0; index < case_count; ++index) {
dd(GetLabelFunction(index));
......
......@@ -1008,8 +1008,8 @@ TEST(min_max_nan) {
auto handle_dnan = [masm](FPURegister dst, Label* nan, Label* back) {
__ bind(nan);
__ LoadRoot(at, Heap::kNanValueRootIndex);
__ Ldc1(dst, FieldMemOperand(at, HeapNumber::kValueOffset));
__ LoadRoot(t8, Heap::kNanValueRootIndex);
__ Ldc1(dst, FieldMemOperand(t8, HeapNumber::kValueOffset));
__ Branch(back);
};
......
......@@ -1501,8 +1501,8 @@ TEST(min_max_nan) {
auto handle_dnan = [masm](FPURegister dst, Label* nan, Label* back) {
__ bind(nan);
__ LoadRoot(at, Heap::kNanValueRootIndex);
__ Ldc1(dst, FieldMemOperand(at, HeapNumber::kValueOffset));
__ LoadRoot(t8, Heap::kNanValueRootIndex);
__ Ldc1(dst, FieldMemOperand(t8, HeapNumber::kValueOffset));
__ Branch(back);
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment