Commit 60b97e80 authored by Djordje.Pesic's avatar Djordje.Pesic Committed by Commit bot

MIPS: JALR to JAL optimization removal

Review URL: https://codereview.chromium.org/1419793014

Cr-Commit-Position: refs/heads/master@{#31917}
parent 45787501
......@@ -88,14 +88,6 @@ bool Operand::is_reg() const {
// RelocInfo.
void RelocInfo::apply(intptr_t delta) {
if (IsCodeTarget(rmode_)) {
uint32_t scope1 = (uint32_t) target_address() & ~kImm28Mask;
uint32_t scope2 = reinterpret_cast<uint32_t>(pc_) & ~kImm28Mask;
if (scope1 != scope2) {
Assembler::JumpToJumpRegister(pc_);
}
}
if (IsInternalReference(rmode_) || IsInternalReferenceEncoded(rmode_)) {
// Absolute code pointer inside code object moves with the code object.
byte* p = reinterpret_cast<byte*>(pc_);
......
......@@ -3024,130 +3024,12 @@ void Assembler::set_target_address_at(Address pc,
*p = LUI | rt_code | ((itarget & kHiMask) >> kLuiShift);
*(p + 1) = ORI | rt_code | (rt_code << 5) | (itarget & kImm16Mask);
// The following code is an optimization for the common case of Call()
// or Jump() which is load to register, and jump through register:
// li(t9, address); jalr(t9) (or jr(t9)).
// If the destination address is in the same 256 MB page as the call, it
// is faster to do a direct jal, or j, rather than jump thru register, since
// that lets the cpu pipeline prefetch the target address. However each
// time the address above is patched, we have to patch the direct jal/j
// instruction, as well as possibly revert to jalr/jr if we now cross a
// 256 MB page. Note that with the jal/j instructions, we do not need to
// load the register, but that code is left, since it makes it easy to
// revert this process. A further optimization could try replacing the
// li sequence with nops.
// This optimization can only be applied if the rt-code from instr2 is the
// register used for the jalr/jr. Finally, we have to skip 'jr ra', which is
// mips return. Occasionally this lands after an li().
Instr instr3 = instr_at(pc + 2 * kInstrSize);
uint32_t ipc = reinterpret_cast<uint32_t>(pc + 3 * kInstrSize);
bool in_range = ((ipc ^ itarget) >> (kImm26Bits + kImmFieldShift)) == 0;
uint32_t target_field =
static_cast<uint32_t>(itarget & kJumpAddrMask) >> kImmFieldShift;
bool patched_jump = false;
#ifndef ALLOW_JAL_IN_BOUNDARY_REGION
// This is a workaround to the 24k core E156 bug (affect some 34k cores also).
// Since the excluded space is only 64KB out of 256MB (0.02 %), we will just
// apply this workaround for all cores so we don't have to identify the core.
if (in_range) {
// The 24k core E156 bug has some very specific requirements, we only check
// the most simple one: if the address of the delay slot instruction is in
// the first or last 32 KB of the 256 MB segment.
uint32_t segment_mask = ((256 * MB) - 1) ^ ((32 * KB) - 1);
uint32_t ipc_segment_addr = ipc & segment_mask;
if (ipc_segment_addr == 0 || ipc_segment_addr == segment_mask)
in_range = false;
}
#endif
if (IsJalr(instr3)) {
// Try to convert JALR to JAL.
if (in_range && GetRt(instr2) == GetRs(instr3)) {
*(p + 2) = JAL | target_field;
patched_jump = true;
}
} else if (IsJr(instr3)) {
// Try to convert JR to J, skip returns (jr ra).
bool is_ret = static_cast<int>(GetRs(instr3)) == ra.code();
if (in_range && !is_ret && GetRt(instr2) == GetRs(instr3)) {
*(p + 2) = J | target_field;
patched_jump = true;
}
} else if (IsJal(instr3)) {
if (in_range) {
// We are patching an already converted JAL.
*(p + 2) = JAL | target_field;
} else {
// Patch JAL, but out of range, revert to JALR.
// JALR rs reg is the rt reg specified in the ORI instruction.
uint32_t rs_field = GetRt(instr2) << kRsShift;
uint32_t rd_field = ra.code() << kRdShift; // Return-address (ra) reg.
*(p+2) = SPECIAL | rs_field | rd_field | JALR;
}
patched_jump = true;
} else if (IsJ(instr3)) {
if (in_range) {
// We are patching an already converted J (jump).
*(p + 2) = J | target_field;
} else {
// Trying patch J, but out of range, just go back to JR.
// JR 'rs' reg is the 'rt' reg specified in the ORI instruction (instr2).
uint32_t rs_field = GetRt(instr2) << kRsShift;
if (IsMipsArchVariant(kMips32r6)) {
*(p + 2) = SPECIAL | rs_field | (zero_reg.code() << kRdShift) | JALR;
} else {
*(p + 2) = SPECIAL | rs_field | JR;
}
}
patched_jump = true;
}
if (icache_flush_mode != SKIP_ICACHE_FLUSH) {
CpuFeatures::FlushICache(pc, (patched_jump ? 3 : 2) * sizeof(int32_t));
CpuFeatures::FlushICache(pc, 2 * sizeof(int32_t));
}
}
void Assembler::JumpToJumpRegister(Address pc) {
// Address pc points to lui/ori instructions.
// Jump to label may follow at pc + 2 * kInstrSize.
uint32_t* p = reinterpret_cast<uint32_t*>(pc);
#ifdef DEBUG
Instr instr1 = instr_at(pc);
#endif
Instr instr2 = instr_at(pc + 1 * kInstrSize);
Instr instr3 = instr_at(pc + 2 * kInstrSize);
bool patched = false;
if (IsJal(instr3)) {
DCHECK(GetOpcodeField(instr1) == LUI);
DCHECK(GetOpcodeField(instr2) == ORI);
uint32_t rs_field = GetRt(instr2) << kRsShift;
uint32_t rd_field = ra.code() << kRdShift; // Return-address (ra) reg.
*(p + 2) = SPECIAL | rs_field | rd_field | JALR;
patched = true;
} else if (IsJ(instr3)) {
DCHECK(GetOpcodeField(instr1) == LUI);
DCHECK(GetOpcodeField(instr2) == ORI);
uint32_t rs_field = GetRt(instr2) << kRsShift;
if (IsMipsArchVariant(kMips32r6)) {
*(p + 2) = SPECIAL | rs_field | (zero_reg.code() << kRdShift) | JALR;
} else {
*(p + 2) = SPECIAL | rs_field | JR;
}
patched = true;
}
if (patched) {
CpuFeatures::FlushICache(pc + 2, sizeof(Address));
}
}
} // namespace internal
} // namespace v8
......
......@@ -487,8 +487,6 @@ class Assembler : public AssemblerBase {
// of that call in the instruction stream.
inline static Address target_address_from_return_address(Address pc);
static void JumpToJumpRegister(Address pc);
static void QuietNaN(HeapObject* nan);
// This sets the branch destination (which gets loaded at the call address).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment