Commit 3def7348 authored by Ivica Bogosavljevic's avatar Ivica Bogosavljevic Committed by Commit Bot

MIPS: Implement PC relative trampolines

Change-Id: Iecbc7b5b4f8cbea99cb83982d0b5f0db78dfa89e
Reviewed-on: https://chromium-review.googlesource.com/1128964
Commit-Queue: Ivica Bogosavljevic <ivica.bogosavljevic@mips.com>
Reviewed-by: 's avatarMiran Karić <miran.karic@mips.com>
Cr-Commit-Position: refs/heads/master@{#54429}
parent 71dddd14
...@@ -311,13 +311,6 @@ void Assembler::CheckBuffer() { ...@@ -311,13 +311,6 @@ void Assembler::CheckBuffer() {
} }
void Assembler::CheckTrampolinePoolQuick(int extra_instructions) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
void Assembler::CheckForEmitInForbiddenSlot() { void Assembler::CheckForEmitInForbiddenSlot() {
if (!is_buffer_growth_blocked()) { if (!is_buffer_growth_blocked()) {
CheckBuffer(); CheckBuffer();
......
...@@ -664,6 +664,19 @@ bool Assembler::IsOri(Instr instr) { ...@@ -664,6 +664,19 @@ bool Assembler::IsOri(Instr instr) {
return opcode == ORI; return opcode == ORI;
} }
bool Assembler::IsMov(Instr instr, Register rd, Register rs) {
uint32_t opcode = GetOpcodeField(instr);
uint32_t rd_field = GetRd(instr);
uint32_t rs_field = GetRs(instr);
uint32_t rt_field = GetRt(instr);
uint32_t rd_reg = static_cast<uint32_t>(rd.code());
uint32_t rs_reg = static_cast<uint32_t>(rs.code());
uint32_t function_field = GetFunctionField(instr);
// Checks if the instruction is a OR with zero_reg argument (aka MOV).
bool res = opcode == SPECIAL && function_field == OR && rd_field == rd_reg &&
rs_field == rs_reg && rt_field == 0;
return res;
}
bool Assembler::IsNop(Instr instr, unsigned int type) { bool Assembler::IsNop(Instr instr, unsigned int type) {
// See Assembler::nop(type). // See Assembler::nop(type).
...@@ -898,10 +911,38 @@ void Assembler::target_at_put(int32_t pos, int32_t target_pos, ...@@ -898,10 +911,38 @@ void Assembler::target_at_put(int32_t pos, int32_t target_pos,
return; return;
} }
DCHECK(IsBranch(instr) || IsLui(instr)); DCHECK(IsBranch(instr) || IsLui(instr) || IsMov(instr, t8, ra));
if (IsBranch(instr)) { if (IsBranch(instr)) {
instr = SetBranchOffset(pos, target_pos, instr); instr = SetBranchOffset(pos, target_pos, instr);
instr_at_put(pos, instr); instr_at_put(pos, instr);
} else if (IsMov(instr, t8, ra)) {
Instr instr_lui = instr_at(pos + 4 * Assembler::kInstrSize);
Instr instr_ori = instr_at(pos + 5 * Assembler::kInstrSize);
DCHECK(IsLui(instr_lui));
DCHECK(IsOri(instr_ori));
int32_t imm_short = target_pos - (pos + Assembler::kBranchPCOffset);
if (is_int16(imm_short)) {
// Optimize by converting to regular branch with 16-bit
// offset
Instr instr_b = BEQ;
instr_b = SetBranchOffset(pos, target_pos, instr_b);
instr_at_put(pos, instr_b);
instr_at_put(pos + 1 * Assembler::kInstrSize, 0);
} else {
int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset);
DCHECK_EQ(imm & 3, 0);
instr_lui &= ~kImm16Mask;
instr_ori &= ~kImm16Mask;
instr_at_put(pos + 4 * Assembler::kInstrSize,
instr_lui | ((imm >> 16) & kImm16Mask));
instr_at_put(pos + 5 * Assembler::kInstrSize,
instr_ori | (imm & kImm16Mask));
}
} else { } else {
Instr instr1 = instr_at(pos + 0 * Assembler::kInstrSize); Instr instr1 = instr_at(pos + 0 * Assembler::kInstrSize);
Instr instr2 = instr_at(pos + 1 * Assembler::kInstrSize); Instr instr2 = instr_at(pos + 1 * Assembler::kInstrSize);
...@@ -3785,49 +3826,37 @@ void Assembler::CheckTrampolinePool() { ...@@ -3785,49 +3826,37 @@ void Assembler::CheckTrampolinePool() {
bc(&after_pool); bc(&after_pool);
} else { } else {
b(&after_pool); b(&after_pool);
nop();
} }
nop();
int pool_start = pc_offset(); int pool_start = pc_offset();
if (IsMipsArchVariant(kMips32r6)) {
for (int i = 0; i < unbound_labels_count_; i++) { for (int i = 0; i < unbound_labels_count_; i++) {
uint32_t imm32;
imm32 = jump_address(&after_pool);
uint32_t lui_offset, jic_offset;
UnpackTargetAddressUnsigned(imm32, lui_offset, jic_offset);
{ {
BlockGrowBufferScope block_buf_growth(this); // printf("Generate trampoline %d\n", i);
// Buffer growth (and relocation) must be blocked for internal // Buffer growth (and relocation) must be blocked for internal
// references until associated instructions are emitted and // references until associated instructions are emitted and
// available to be patched. // available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED); if (IsMipsArchVariant(kMips32r6)) {
UseScratchRegisterScope temps(this); bc(&after_pool);
Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; nop();
lui(scratch, lui_offset);
jic(scratch, jic_offset);
}
CheckBuffer();
}
} else { } else {
for (int i = 0; i < unbound_labels_count_; i++) { Label find_pc;
uint32_t imm32; or_(t8, ra, zero_reg);
imm32 = jump_address(&after_pool); bal(&find_pc);
UseScratchRegisterScope temps(this); or_(t9, ra, zero_reg);
Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; bind(&find_pc);
{ or_(ra, t8, zero_reg);
BlockGrowBufferScope block_buf_growth(this); lui(t8, 0);
// Buffer growth (and relocation) must be blocked for internal ori(t8, t8, 0);
// references until associated instructions are emitted and addu(t9, t9, t8);
// available to be patched. // Instruction jr will take or_ from the next trampoline.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED); // in its branch delay slot. This is the expected behavior
lui(scratch, (imm32 & kHiMask) >> kLuiShift); // in order to decrease size of trampoline pool.
ori(scratch, scratch, (imm32 & kImm16Mask)); jr(t9);
} }
CheckBuffer();
jr(scratch);
nop();
} }
} }
nop();
bind(&after_pool); bind(&after_pool);
trampoline_ = Trampoline(pool_start, unbound_labels_count_); trampoline_ = Trampoline(pool_start, unbound_labels_count_);
......
...@@ -612,6 +612,11 @@ class Assembler : public AssemblerBase { ...@@ -612,6 +612,11 @@ class Assembler : public AssemblerBase {
// Difference between address of current opcode and target address offset. // Difference between address of current opcode and target address offset.
static constexpr int kBranchPCOffset = 4; static constexpr int kBranchPCOffset = 4;
// Difference between address of current opcode and target address offset,
// when we are generatinga sequence of instructions for long relative PC
// branches
static constexpr int kLongBranchPCOffset = 12;
// Here we are patching the address in the LUI/ORI instruction pair. // Here we are patching the address in the LUI/ORI instruction pair.
// These values are used in the serialization process and must be zero for // These values are used in the serialization process and must be zero for
// MIPS platform, as Code, Embedded Object or External-reference pointers // MIPS platform, as Code, Embedded Object or External-reference pointers
...@@ -644,11 +649,8 @@ class Assembler : public AssemblerBase { ...@@ -644,11 +649,8 @@ class Assembler : public AssemblerBase {
// Max offset for compact branch instructions with 26-bit offset field // Max offset for compact branch instructions with 26-bit offset field
static constexpr int kMaxCompactBranchOffset = (1 << (28 - 1)) - 1; static constexpr int kMaxCompactBranchOffset = (1 << (28 - 1)) - 1;
#ifdef _MIPS_ARCH_MIPS32R6 static constexpr int kTrampolineSlotsSize =
static constexpr int kTrampolineSlotsSize = 2 * kInstrSize; IsMipsArchVariant(kMips32r6) ? 2 * kInstrSize : 8 * kInstrSize;
#else
static constexpr int kTrampolineSlotsSize = 4 * kInstrSize;
#endif
RegList* GetScratchRegisterList() { return &scratch_register_list_; } RegList* GetScratchRegisterList() { return &scratch_register_list_; }
...@@ -1765,6 +1767,7 @@ class Assembler : public AssemblerBase { ...@@ -1765,6 +1767,7 @@ class Assembler : public AssemblerBase {
static bool IsBeqc(Instr instr); static bool IsBeqc(Instr instr);
static bool IsBnec(Instr instr); static bool IsBnec(Instr instr);
static bool IsJicOrJialc(Instr instr); static bool IsJicOrJialc(Instr instr);
static bool IsMov(Instr instr, Register rd, Register rs);
static bool IsJump(Instr instr); static bool IsJump(Instr instr);
static bool IsJ(Instr instr); static bool IsJ(Instr instr);
...@@ -1881,6 +1884,9 @@ class Assembler : public AssemblerBase { ...@@ -1881,6 +1884,9 @@ class Assembler : public AssemblerBase {
void EndBlockTrampolinePool() { void EndBlockTrampolinePool() {
trampoline_pool_blocked_nesting_--; trampoline_pool_blocked_nesting_--;
if (trampoline_pool_blocked_nesting_ == 0) {
CheckTrampolinePoolQuick(1);
}
} }
bool is_trampoline_pool_blocked() const { bool is_trampoline_pool_blocked() const {
...@@ -1916,7 +1922,11 @@ class Assembler : public AssemblerBase { ...@@ -1916,7 +1922,11 @@ class Assembler : public AssemblerBase {
} }
} }
inline void CheckTrampolinePoolQuick(int extra_instructions = 0); inline void CheckTrampolinePoolQuick(int extra_instructions = 0) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
inline void CheckBuffer(); inline void CheckBuffer();
......
This diff is collapsed.
...@@ -273,13 +273,6 @@ void Assembler::CheckBuffer() { ...@@ -273,13 +273,6 @@ void Assembler::CheckBuffer() {
} }
void Assembler::CheckTrampolinePoolQuick(int extra_instructions) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
void Assembler::CheckForEmitInForbiddenSlot() { void Assembler::CheckForEmitInForbiddenSlot() {
if (!is_buffer_growth_blocked()) { if (!is_buffer_growth_blocked()) {
CheckBuffer(); CheckBuffer();
......
...@@ -585,6 +585,19 @@ bool Assembler::IsBnec(Instr instr) { ...@@ -585,6 +585,19 @@ bool Assembler::IsBnec(Instr instr) {
return opcode == POP30 && rs != 0 && rs < rt; // && rt != 0 return opcode == POP30 && rs != 0 && rs < rt; // && rt != 0
} }
bool Assembler::IsMov(Instr instr, Register rd, Register rs) {
uint32_t opcode = GetOpcodeField(instr);
uint32_t rd_field = GetRd(instr);
uint32_t rs_field = GetRs(instr);
uint32_t rt_field = GetRt(instr);
uint32_t rd_reg = static_cast<uint32_t>(rd.code());
uint32_t rs_reg = static_cast<uint32_t>(rs.code());
uint32_t function_field = GetFunctionField(instr);
// Checks if the instruction is a OR with zero_reg argument (aka MOV).
bool res = opcode == SPECIAL && function_field == OR && rd_field == rd_reg &&
rs_field == rs_reg && rt_field == 0;
return res;
}
bool Assembler::IsJump(Instr instr) { bool Assembler::IsJump(Instr instr) {
uint32_t opcode = GetOpcodeField(instr); uint32_t opcode = GetOpcodeField(instr);
...@@ -865,6 +878,34 @@ void Assembler::target_at_put(int pos, int target_pos, bool is_internal) { ...@@ -865,6 +878,34 @@ void Assembler::target_at_put(int pos, int target_pos, bool is_internal) {
instr_ori | ((imm >> 16) & kImm16Mask)); instr_ori | ((imm >> 16) & kImm16Mask));
instr_at_put(pos + 3 * Assembler::kInstrSize, instr_at_put(pos + 3 * Assembler::kInstrSize,
instr_ori2 | (imm & kImm16Mask)); instr_ori2 | (imm & kImm16Mask));
} else if (IsMov(instr, t8, ra)) {
Instr instr_lui = instr_at(pos + 4 * Assembler::kInstrSize);
Instr instr_ori = instr_at(pos + 5 * Assembler::kInstrSize);
DCHECK(IsLui(instr_lui));
DCHECK(IsOri(instr_ori));
int32_t imm_short = target_pos - (pos + Assembler::kBranchPCOffset);
if (is_int16(imm_short)) {
// Optimize by converting to regular branch with 16-bit
// offset
Instr instr_b = BEQ;
instr_b = SetBranchOffset(pos, target_pos, instr_b);
instr_at_put(pos, instr_b);
instr_at_put(pos + 1 * Assembler::kInstrSize, 0);
} else {
int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset);
DCHECK_EQ(imm & 3, 0);
instr_lui &= ~kImm16Mask;
instr_ori &= ~kImm16Mask;
instr_at_put(pos + 4 * Assembler::kInstrSize,
instr_lui | ((imm >> 16) & kImm16Mask));
instr_at_put(pos + 5 * Assembler::kInstrSize,
instr_ori | (imm & kImm16Mask));
}
} else if (IsJ(instr) || IsJal(instr)) { } else if (IsJ(instr) || IsJal(instr)) {
int32_t imm28 = target_pos - pos; int32_t imm28 = target_pos - pos;
DCHECK_EQ(imm28 & 3, 0); DCHECK_EQ(imm28 & 3, 0);
...@@ -4143,15 +4184,30 @@ void Assembler::CheckTrampolinePool() { ...@@ -4143,15 +4184,30 @@ void Assembler::CheckTrampolinePool() {
int pool_start = pc_offset(); int pool_start = pc_offset();
for (int i = 0; i < unbound_labels_count_; i++) { for (int i = 0; i < unbound_labels_count_; i++) {
{ BlockGrowBufferScope block_buf_growth(this); { // Buffer growth (and relocation) must be blocked for internal
// Buffer growth (and relocation) must be blocked for internal
// references until associated instructions are emitted and available // references until associated instructions are emitted and available
// to be patched. // to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED); if (kArchVariant == kMips64r6) {
j(&after_pool); bc(&after_pool);
}
nop(); nop();
} else {
Label find_pc;
or_(t8, ra, zero_reg);
bal(&find_pc);
or_(t9, ra, zero_reg);
bind(&find_pc);
or_(ra, t8, zero_reg);
lui(t8, 0);
ori(t8, t8, 0);
daddu(t9, t9, t8);
// Instruction jr will take or_ from the next trampoline.
// in its branch delay slot. This is the expected behavior
// in order to decrease size of trampoline pool.
jr(t9);
}
} }
}
nop();
bind(&after_pool); bind(&after_pool);
trampoline_ = Trampoline(pool_start, unbound_labels_count_); trampoline_ = Trampoline(pool_start, unbound_labels_count_);
......
...@@ -622,6 +622,11 @@ class Assembler : public AssemblerBase { ...@@ -622,6 +622,11 @@ class Assembler : public AssemblerBase {
// Difference between address of current opcode and target address offset. // Difference between address of current opcode and target address offset.
static constexpr int kBranchPCOffset = 4; static constexpr int kBranchPCOffset = 4;
// Difference between address of current opcode and target address offset,
// when we are generatinga sequence of instructions for long relative PC
// branches
static constexpr int kLongBranchPCOffset = 12;
// Here we are patching the address in the LUI/ORI instruction pair. // Here we are patching the address in the LUI/ORI instruction pair.
// These values are used in the serialization process and must be zero for // These values are used in the serialization process and must be zero for
// MIPS platform, as Code, Embedded Object or External-reference pointers // MIPS platform, as Code, Embedded Object or External-reference pointers
...@@ -655,7 +660,8 @@ class Assembler : public AssemblerBase { ...@@ -655,7 +660,8 @@ class Assembler : public AssemblerBase {
// Max offset for compact branch instructions with 26-bit offset field // Max offset for compact branch instructions with 26-bit offset field
static constexpr int kMaxCompactBranchOffset = (1 << (28 - 1)) - 1; static constexpr int kMaxCompactBranchOffset = (1 << (28 - 1)) - 1;
static constexpr int kTrampolineSlotsSize = 2 * kInstrSize; static constexpr int kTrampolineSlotsSize =
kArchVariant == kMips64r6 ? 2 * kInstrSize : 8 * kInstrSize;
RegList* GetScratchRegisterList() { return &scratch_register_list_; } RegList* GetScratchRegisterList() { return &scratch_register_list_; }
...@@ -1845,6 +1851,7 @@ class Assembler : public AssemblerBase { ...@@ -1845,6 +1851,7 @@ class Assembler : public AssemblerBase {
static bool IsJ(Instr instr); static bool IsJ(Instr instr);
static bool IsLui(Instr instr); static bool IsLui(Instr instr);
static bool IsOri(Instr instr); static bool IsOri(Instr instr);
static bool IsMov(Instr instr, Register rd, Register rs);
static bool IsJal(Instr instr); static bool IsJal(Instr instr);
static bool IsJr(Instr instr); static bool IsJr(Instr instr);
...@@ -1950,6 +1957,9 @@ class Assembler : public AssemblerBase { ...@@ -1950,6 +1957,9 @@ class Assembler : public AssemblerBase {
void EndBlockTrampolinePool() { void EndBlockTrampolinePool() {
trampoline_pool_blocked_nesting_--; trampoline_pool_blocked_nesting_--;
if (trampoline_pool_blocked_nesting_ == 0) {
CheckTrampolinePoolQuick(1);
}
} }
bool is_trampoline_pool_blocked() const { bool is_trampoline_pool_blocked() const {
...@@ -1985,7 +1995,11 @@ class Assembler : public AssemblerBase { ...@@ -1985,7 +1995,11 @@ class Assembler : public AssemblerBase {
} }
} }
inline void CheckTrampolinePoolQuick(int extra_instructions = 0); void CheckTrampolinePoolQuick(int extra_instructions = 0) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
private: private:
// Avoid overflows for displacements etc. // Avoid overflows for displacements etc.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment