Commit 3def7348 authored by Ivica Bogosavljevic's avatar Ivica Bogosavljevic Committed by Commit Bot

MIPS: Implement PC relative trampolines

Change-Id: Iecbc7b5b4f8cbea99cb83982d0b5f0db78dfa89e
Reviewed-on: https://chromium-review.googlesource.com/1128964
Commit-Queue: Ivica Bogosavljevic <ivica.bogosavljevic@mips.com>
Reviewed-by: 's avatarMiran Karić <miran.karic@mips.com>
Cr-Commit-Position: refs/heads/master@{#54429}
parent 71dddd14
......@@ -311,13 +311,6 @@ void Assembler::CheckBuffer() {
}
void Assembler::CheckTrampolinePoolQuick(int extra_instructions) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
void Assembler::CheckForEmitInForbiddenSlot() {
if (!is_buffer_growth_blocked()) {
CheckBuffer();
......
......@@ -664,6 +664,19 @@ bool Assembler::IsOri(Instr instr) {
return opcode == ORI;
}
bool Assembler::IsMov(Instr instr, Register rd, Register rs) {
uint32_t opcode = GetOpcodeField(instr);
uint32_t rd_field = GetRd(instr);
uint32_t rs_field = GetRs(instr);
uint32_t rt_field = GetRt(instr);
uint32_t rd_reg = static_cast<uint32_t>(rd.code());
uint32_t rs_reg = static_cast<uint32_t>(rs.code());
uint32_t function_field = GetFunctionField(instr);
// Checks if the instruction is a OR with zero_reg argument (aka MOV).
bool res = opcode == SPECIAL && function_field == OR && rd_field == rd_reg &&
rs_field == rs_reg && rt_field == 0;
return res;
}
bool Assembler::IsNop(Instr instr, unsigned int type) {
// See Assembler::nop(type).
......@@ -898,10 +911,38 @@ void Assembler::target_at_put(int32_t pos, int32_t target_pos,
return;
}
DCHECK(IsBranch(instr) || IsLui(instr));
DCHECK(IsBranch(instr) || IsLui(instr) || IsMov(instr, t8, ra));
if (IsBranch(instr)) {
instr = SetBranchOffset(pos, target_pos, instr);
instr_at_put(pos, instr);
} else if (IsMov(instr, t8, ra)) {
Instr instr_lui = instr_at(pos + 4 * Assembler::kInstrSize);
Instr instr_ori = instr_at(pos + 5 * Assembler::kInstrSize);
DCHECK(IsLui(instr_lui));
DCHECK(IsOri(instr_ori));
int32_t imm_short = target_pos - (pos + Assembler::kBranchPCOffset);
if (is_int16(imm_short)) {
// Optimize by converting to regular branch with 16-bit
// offset
Instr instr_b = BEQ;
instr_b = SetBranchOffset(pos, target_pos, instr_b);
instr_at_put(pos, instr_b);
instr_at_put(pos + 1 * Assembler::kInstrSize, 0);
} else {
int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset);
DCHECK_EQ(imm & 3, 0);
instr_lui &= ~kImm16Mask;
instr_ori &= ~kImm16Mask;
instr_at_put(pos + 4 * Assembler::kInstrSize,
instr_lui | ((imm >> 16) & kImm16Mask));
instr_at_put(pos + 5 * Assembler::kInstrSize,
instr_ori | (imm & kImm16Mask));
}
} else {
Instr instr1 = instr_at(pos + 0 * Assembler::kInstrSize);
Instr instr2 = instr_at(pos + 1 * Assembler::kInstrSize);
......@@ -3785,49 +3826,37 @@ void Assembler::CheckTrampolinePool() {
bc(&after_pool);
} else {
b(&after_pool);
nop();
}
nop();
int pool_start = pc_offset();
if (IsMipsArchVariant(kMips32r6)) {
for (int i = 0; i < unbound_labels_count_; i++) {
uint32_t imm32;
imm32 = jump_address(&after_pool);
uint32_t lui_offset, jic_offset;
UnpackTargetAddressUnsigned(imm32, lui_offset, jic_offset);
{
BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal
// references until associated instructions are emitted and
// available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
UseScratchRegisterScope temps(this);
Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
lui(scratch, lui_offset);
jic(scratch, jic_offset);
}
CheckBuffer();
}
} else {
for (int i = 0; i < unbound_labels_count_; i++) {
uint32_t imm32;
imm32 = jump_address(&after_pool);
UseScratchRegisterScope temps(this);
Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
{
BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal
// references until associated instructions are emitted and
// available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
lui(scratch, (imm32 & kHiMask) >> kLuiShift);
ori(scratch, scratch, (imm32 & kImm16Mask));
for (int i = 0; i < unbound_labels_count_; i++) {
{
// printf("Generate trampoline %d\n", i);
// Buffer growth (and relocation) must be blocked for internal
// references until associated instructions are emitted and
// available to be patched.
if (IsMipsArchVariant(kMips32r6)) {
bc(&after_pool);
nop();
} else {
Label find_pc;
or_(t8, ra, zero_reg);
bal(&find_pc);
or_(t9, ra, zero_reg);
bind(&find_pc);
or_(ra, t8, zero_reg);
lui(t8, 0);
ori(t8, t8, 0);
addu(t9, t9, t8);
// Instruction jr will take or_ from the next trampoline.
// in its branch delay slot. This is the expected behavior
// in order to decrease size of trampoline pool.
jr(t9);
}
CheckBuffer();
jr(scratch);
nop();
}
}
nop();
bind(&after_pool);
trampoline_ = Trampoline(pool_start, unbound_labels_count_);
......
......@@ -612,6 +612,11 @@ class Assembler : public AssemblerBase {
// Difference between address of current opcode and target address offset.
static constexpr int kBranchPCOffset = 4;
// Difference between address of current opcode and target address offset,
// when we are generatinga sequence of instructions for long relative PC
// branches
static constexpr int kLongBranchPCOffset = 12;
// Here we are patching the address in the LUI/ORI instruction pair.
// These values are used in the serialization process and must be zero for
// MIPS platform, as Code, Embedded Object or External-reference pointers
......@@ -644,11 +649,8 @@ class Assembler : public AssemblerBase {
// Max offset for compact branch instructions with 26-bit offset field
static constexpr int kMaxCompactBranchOffset = (1 << (28 - 1)) - 1;
#ifdef _MIPS_ARCH_MIPS32R6
static constexpr int kTrampolineSlotsSize = 2 * kInstrSize;
#else
static constexpr int kTrampolineSlotsSize = 4 * kInstrSize;
#endif
static constexpr int kTrampolineSlotsSize =
IsMipsArchVariant(kMips32r6) ? 2 * kInstrSize : 8 * kInstrSize;
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
......@@ -1765,6 +1767,7 @@ class Assembler : public AssemblerBase {
static bool IsBeqc(Instr instr);
static bool IsBnec(Instr instr);
static bool IsJicOrJialc(Instr instr);
static bool IsMov(Instr instr, Register rd, Register rs);
static bool IsJump(Instr instr);
static bool IsJ(Instr instr);
......@@ -1881,6 +1884,9 @@ class Assembler : public AssemblerBase {
void EndBlockTrampolinePool() {
trampoline_pool_blocked_nesting_--;
if (trampoline_pool_blocked_nesting_ == 0) {
CheckTrampolinePoolQuick(1);
}
}
bool is_trampoline_pool_blocked() const {
......@@ -1916,7 +1922,11 @@ class Assembler : public AssemblerBase {
}
}
inline void CheckTrampolinePoolQuick(int extra_instructions = 0);
inline void CheckTrampolinePoolQuick(int extra_instructions = 0) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
inline void CheckBuffer();
......
This diff is collapsed.
......@@ -273,13 +273,6 @@ void Assembler::CheckBuffer() {
}
void Assembler::CheckTrampolinePoolQuick(int extra_instructions) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
void Assembler::CheckForEmitInForbiddenSlot() {
if (!is_buffer_growth_blocked()) {
CheckBuffer();
......
......@@ -585,6 +585,19 @@ bool Assembler::IsBnec(Instr instr) {
return opcode == POP30 && rs != 0 && rs < rt; // && rt != 0
}
bool Assembler::IsMov(Instr instr, Register rd, Register rs) {
uint32_t opcode = GetOpcodeField(instr);
uint32_t rd_field = GetRd(instr);
uint32_t rs_field = GetRs(instr);
uint32_t rt_field = GetRt(instr);
uint32_t rd_reg = static_cast<uint32_t>(rd.code());
uint32_t rs_reg = static_cast<uint32_t>(rs.code());
uint32_t function_field = GetFunctionField(instr);
// Checks if the instruction is a OR with zero_reg argument (aka MOV).
bool res = opcode == SPECIAL && function_field == OR && rd_field == rd_reg &&
rs_field == rs_reg && rt_field == 0;
return res;
}
bool Assembler::IsJump(Instr instr) {
uint32_t opcode = GetOpcodeField(instr);
......@@ -865,6 +878,34 @@ void Assembler::target_at_put(int pos, int target_pos, bool is_internal) {
instr_ori | ((imm >> 16) & kImm16Mask));
instr_at_put(pos + 3 * Assembler::kInstrSize,
instr_ori2 | (imm & kImm16Mask));
} else if (IsMov(instr, t8, ra)) {
Instr instr_lui = instr_at(pos + 4 * Assembler::kInstrSize);
Instr instr_ori = instr_at(pos + 5 * Assembler::kInstrSize);
DCHECK(IsLui(instr_lui));
DCHECK(IsOri(instr_ori));
int32_t imm_short = target_pos - (pos + Assembler::kBranchPCOffset);
if (is_int16(imm_short)) {
// Optimize by converting to regular branch with 16-bit
// offset
Instr instr_b = BEQ;
instr_b = SetBranchOffset(pos, target_pos, instr_b);
instr_at_put(pos, instr_b);
instr_at_put(pos + 1 * Assembler::kInstrSize, 0);
} else {
int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset);
DCHECK_EQ(imm & 3, 0);
instr_lui &= ~kImm16Mask;
instr_ori &= ~kImm16Mask;
instr_at_put(pos + 4 * Assembler::kInstrSize,
instr_lui | ((imm >> 16) & kImm16Mask));
instr_at_put(pos + 5 * Assembler::kInstrSize,
instr_ori | (imm & kImm16Mask));
}
} else if (IsJ(instr) || IsJal(instr)) {
int32_t imm28 = target_pos - pos;
DCHECK_EQ(imm28 & 3, 0);
......@@ -4143,15 +4184,30 @@ void Assembler::CheckTrampolinePool() {
int pool_start = pc_offset();
for (int i = 0; i < unbound_labels_count_; i++) {
{ BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal
{ // Buffer growth (and relocation) must be blocked for internal
// references until associated instructions are emitted and available
// to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
j(&after_pool);
if (kArchVariant == kMips64r6) {
bc(&after_pool);
nop();
} else {
Label find_pc;
or_(t8, ra, zero_reg);
bal(&find_pc);
or_(t9, ra, zero_reg);
bind(&find_pc);
or_(ra, t8, zero_reg);
lui(t8, 0);
ori(t8, t8, 0);
daddu(t9, t9, t8);
// Instruction jr will take or_ from the next trampoline.
// in its branch delay slot. This is the expected behavior
// in order to decrease size of trampoline pool.
jr(t9);
}
}
nop();
}
nop();
bind(&after_pool);
trampoline_ = Trampoline(pool_start, unbound_labels_count_);
......
......@@ -622,6 +622,11 @@ class Assembler : public AssemblerBase {
// Difference between address of current opcode and target address offset.
static constexpr int kBranchPCOffset = 4;
// Difference between address of current opcode and target address offset,
// when we are generatinga sequence of instructions for long relative PC
// branches
static constexpr int kLongBranchPCOffset = 12;
// Here we are patching the address in the LUI/ORI instruction pair.
// These values are used in the serialization process and must be zero for
// MIPS platform, as Code, Embedded Object or External-reference pointers
......@@ -655,7 +660,8 @@ class Assembler : public AssemblerBase {
// Max offset for compact branch instructions with 26-bit offset field
static constexpr int kMaxCompactBranchOffset = (1 << (28 - 1)) - 1;
static constexpr int kTrampolineSlotsSize = 2 * kInstrSize;
static constexpr int kTrampolineSlotsSize =
kArchVariant == kMips64r6 ? 2 * kInstrSize : 8 * kInstrSize;
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
......@@ -1845,6 +1851,7 @@ class Assembler : public AssemblerBase {
static bool IsJ(Instr instr);
static bool IsLui(Instr instr);
static bool IsOri(Instr instr);
static bool IsMov(Instr instr, Register rd, Register rs);
static bool IsJal(Instr instr);
static bool IsJr(Instr instr);
......@@ -1950,6 +1957,9 @@ class Assembler : public AssemblerBase {
void EndBlockTrampolinePool() {
trampoline_pool_blocked_nesting_--;
if (trampoline_pool_blocked_nesting_ == 0) {
CheckTrampolinePoolQuick(1);
}
}
bool is_trampoline_pool_blocked() const {
......@@ -1985,7 +1995,11 @@ class Assembler : public AssemblerBase {
}
}
inline void CheckTrampolinePoolQuick(int extra_instructions = 0);
void CheckTrampolinePoolQuick(int extra_instructions = 0) {
if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
CheckTrampolinePool();
}
}
private:
// Avoid overflows for displacements etc.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment