Commit 962703ca authored by dusan.milosavljevic's avatar dusan.milosavljevic Committed by Commit bot

MIPS64: Improve long branches utilizing code range.

Improves code size of generated regexp in TestSizeOfRegExpCode test by 33%.

Execution time of the same test improved by ~10%.

Utilizing code range for mips64 enable us to use J/JAL
instructions for long branches.

TEST=cctest/test-heap/TestSizeOfRegExpCode
BUG=

Review URL: https://codereview.chromium.org/1147503002

Cr-Commit-Position: refs/heads/master@{#28867}
parent fbe973ff
......@@ -148,7 +148,14 @@ const int kPointerSizeLog2 = 3;
const intptr_t kIntptrSignBit = V8_INT64_C(0x8000000000000000);
const uintptr_t kUintptrAllBitsSet = V8_UINT64_C(0xFFFFFFFFFFFFFFFF);
const bool kRequiresCodeRange = true;
#if V8_TARGET_ARCH_MIPS64
// To use pseudo-relative jumps such as j/jal instructions which have 28-bit
// encoded immediate, the addresses have to be in range of 256MB aligned
// region. Used only for large object space.
const size_t kMaximalCodeRangeSize = 256 * MB;
#else
const size_t kMaximalCodeRangeSize = 512 * MB;
#endif
#if V8_OS_WIN
const size_t kMinimumCodeRangeSize = 4 * MB;
const size_t kReservedCodeRangePages = 1;
......
......@@ -114,7 +114,14 @@ bool CodeRange::SetUp(size_t requested) {
}
DCHECK(!kRequiresCodeRange || requested <= kMaximalCodeRangeSize);
#ifdef V8_TARGET_ARCH_MIPS64
// To use pseudo-relative jumps such as j/jal instructions which have 28-bit
// encoded immediate, the addresses have to be in range of 256Mb aligned
// region.
code_range_ = new base::VirtualMemory(requested, kMaximalCodeRangeSize);
#else
code_range_ = new base::VirtualMemory(requested);
#endif
CHECK(code_range_ != NULL);
if (!code_range_->IsReserved()) {
delete code_range_;
......@@ -645,7 +652,14 @@ MemoryChunk* MemoryAllocator::AllocateChunk(intptr_t reserve_area_size,
base::OS::CommitPageSize());
// Allocate executable memory either from code range or from the
// OS.
#ifdef V8_TARGET_ARCH_MIPS64
// Use code range only for large object space on mips64 to keep address
// range within 256-MB memory region.
if (isolate_->code_range() != NULL && isolate_->code_range()->valid() &&
commit_area_size > CodePageAreaSize()) {
#else
if (isolate_->code_range() != NULL && isolate_->code_range()->valid()) {
#endif
base = isolate_->code_range()->AllocateRawMemory(chunk_size, commit_size,
&chunk_size);
DCHECK(
......
......@@ -196,24 +196,17 @@ Address Assembler::break_address_from_return_address(Address pc) {
void Assembler::set_target_internal_reference_encoded_at(Address pc,
Address target) {
// Encoded internal references are lui/ori load of 48-bit absolute address.
Instr instr_lui = Assembler::instr_at(pc + 0 * Assembler::kInstrSize);
Instr instr_ori = Assembler::instr_at(pc + 1 * Assembler::kInstrSize);
Instr instr_ori2 = Assembler::instr_at(pc + 3 * Assembler::kInstrSize);
DCHECK(Assembler::IsLui(instr_lui));
DCHECK(Assembler::IsOri(instr_ori));
DCHECK(Assembler::IsOri(instr_ori2));
instr_lui &= ~kImm16Mask;
instr_ori &= ~kImm16Mask;
instr_ori2 &= ~kImm16Mask;
int64_t imm = reinterpret_cast<int64_t>(target);
DCHECK((imm & 3) == 0);
Assembler::instr_at_put(pc + 0 * Assembler::kInstrSize,
instr_lui | ((imm >> 32) & kImm16Mask));
Assembler::instr_at_put(pc + 1 * Assembler::kInstrSize,
instr_ori | ((imm >> 16) & kImm16Mask));
Assembler::instr_at_put(pc + 3 * Assembler::kInstrSize,
instr_ori | (imm & kImm16Mask));
// Encoded internal references are j/jal instructions.
Instr instr = Assembler::instr_at(pc + 0 * Assembler::kInstrSize);
uint64_t imm28 =
(reinterpret_cast<uint64_t>(target) & static_cast<uint64_t>(kImm28Mask));
instr &= ~kImm26Mask;
uint64_t imm26 = imm28 >> 2;
DCHECK(is_uint26(imm26));
instr_at_put(pc, instr | (imm26 & kImm26Mask));
// Currently used only by deserializer, and all code will be flushed
// after complete deserialization, no need to flush on each reference.
}
......@@ -222,7 +215,7 @@ void Assembler::set_target_internal_reference_encoded_at(Address pc,
void Assembler::deserialization_set_target_internal_reference_at(
Address pc, Address target, RelocInfo::Mode mode) {
if (mode == RelocInfo::INTERNAL_REFERENCE_ENCODED) {
DCHECK(IsLui(instr_at(pc)));
DCHECK(IsJ(instr_at(pc)));
set_target_internal_reference_encoded_at(pc, target);
} else {
DCHECK(mode == RelocInfo::INTERNAL_REFERENCE);
......@@ -270,18 +263,14 @@ Address RelocInfo::target_internal_reference() {
if (rmode_ == INTERNAL_REFERENCE) {
return Memory::Address_at(pc_);
} else {
// Encoded internal references are lui/ori load of 48-bit absolute address.
// Encoded internal references are j/jal instructions.
DCHECK(rmode_ == INTERNAL_REFERENCE_ENCODED);
Instr instr_lui = Assembler::instr_at(pc_ + 0 * Assembler::kInstrSize);
Instr instr_ori = Assembler::instr_at(pc_ + 1 * Assembler::kInstrSize);
Instr instr_ori2 = Assembler::instr_at(pc_ + 3 * Assembler::kInstrSize);
DCHECK(Assembler::IsLui(instr_lui));
DCHECK(Assembler::IsOri(instr_ori));
DCHECK(Assembler::IsOri(instr_ori2));
int64_t imm = (instr_lui & static_cast<int64_t>(kImm16Mask)) << 32;
imm |= (instr_ori & static_cast<int64_t>(kImm16Mask)) << 16;
imm |= (instr_ori2 & static_cast<int64_t>(kImm16Mask));
return reinterpret_cast<Address>(imm);
Instr instr = Assembler::instr_at(pc_ + 0 * Assembler::kInstrSize);
instr &= kImm26Mask;
uint64_t imm28 = instr << 2;
uint64_t segment =
(reinterpret_cast<uint64_t>(pc_) & ~static_cast<uint64_t>(kImm28Mask));
return reinterpret_cast<Address>(segment | imm28);
}
}
......
......@@ -637,7 +637,7 @@ int Assembler::target_at(int pos, bool is_internal) {
}
}
// Check we have a branch or jump instruction.
DCHECK(IsBranch(instr) || IsLui(instr));
DCHECK(IsBranch(instr) || IsJ(instr) || IsJal(instr) || IsLui(instr));
// Do NOT change this to <<2. We rely on arithmetic shifts here, assuming
// the compiler uses arithmetic shifts for signed integers.
if (IsBranch(instr)) {
......@@ -673,8 +673,18 @@ int Assembler::target_at(int pos, bool is_internal) {
return pos - delta;
}
} else {
UNREACHABLE();
return 0;
DCHECK(IsJ(instr) || IsJal(instr));
int32_t imm28 = (instr & static_cast<int32_t>(kImm26Mask)) << 2;
if (imm28 == kEndOfJumpChain) {
// EndOfChain sentinel is returned directly, not relative to pc or pos.
return kEndOfChain;
} else {
uint64_t instr_address = reinterpret_cast<int64_t>(buffer_ + pos);
instr_address &= kImm28Mask;
int delta = static_cast<int>(instr_address - imm28);
DCHECK(pos > delta);
return pos - delta;
}
}
}
......@@ -694,7 +704,7 @@ void Assembler::target_at_put(int pos, int target_pos, bool is_internal) {
return;
}
DCHECK(IsBranch(instr) || IsLui(instr));
DCHECK(IsBranch(instr) || IsJ(instr) || IsJal(instr) || IsLui(instr));
if (IsBranch(instr)) {
int32_t imm18 = target_pos - (pos + kBranchPCOffset);
DCHECK((imm18 & 3) == 0);
......@@ -725,7 +735,16 @@ void Assembler::target_at_put(int pos, int target_pos, bool is_internal) {
instr_at_put(pos + 3 * Assembler::kInstrSize,
instr_ori2 | (imm & kImm16Mask));
} else {
UNREACHABLE();
DCHECK(IsJ(instr) || IsJal(instr));
uint64_t imm28 = reinterpret_cast<uint64_t>(buffer_) + target_pos;
imm28 &= kImm28Mask;
DCHECK((imm28 & 3) == 0);
instr &= ~kImm26Mask;
uint32_t imm26 = imm28 >> 2;
DCHECK(is_uint26(imm26));
instr_at_put(pos, instr | (imm26 & kImm26Mask));
}
}
......@@ -787,7 +806,8 @@ void Assembler::bind_to(Label* L, int pos) {
}
target_at_put(fixup_pos, pos, false);
} else {
DCHECK(IsJ(instr) || IsLui(instr) || IsEmittedConstant(instr));
DCHECK(IsJ(instr) || IsJal(instr) || IsLui(instr) ||
IsEmittedConstant(instr));
target_at_put(fixup_pos, pos, false);
}
}
......@@ -984,7 +1004,6 @@ uint64_t Assembler::jump_address(Label* L) {
return kEndOfJumpChain;
}
}
uint64_t imm = reinterpret_cast<uint64_t>(buffer_) + target_pos;
DCHECK((imm & 3) == 0);
......@@ -1359,12 +1378,14 @@ void Assembler::bnezc(Register rs, int32_t offset) {
void Assembler::j(int64_t target) {
#if DEBUG
// Get pc of delay slot.
uint64_t ipc = reinterpret_cast<uint64_t>(pc_ + 1 * kInstrSize);
bool in_range = (ipc ^ static_cast<uint64_t>(target) >>
(kImm26Bits + kImmFieldShift)) == 0;
DCHECK(in_range && ((target & 3) == 0));
if (target != kEndOfJumpChain) {
uint64_t ipc = reinterpret_cast<uint64_t>(pc_ + 1 * kInstrSize);
bool in_range = ((ipc ^ static_cast<uint64_t>(target)) >>
(kImm26Bits + kImmFieldShift)) == 0;
DCHECK(in_range && ((target & 3) == 0));
}
#endif
GenInstrJump(J, target >> 2);
GenInstrJump(J, (target >> 2) & kImm26Mask);
}
......@@ -1385,13 +1406,15 @@ void Assembler::jr(Register rs) {
void Assembler::jal(int64_t target) {
#ifdef DEBUG
// Get pc of delay slot.
uint64_t ipc = reinterpret_cast<uint64_t>(pc_ + 1 * kInstrSize);
bool in_range = (ipc ^ static_cast<uint64_t>(target) >>
(kImm26Bits + kImmFieldShift)) == 0;
DCHECK(in_range && ((target & 3) == 0));
if (target != kEndOfJumpChain) {
uint64_t ipc = reinterpret_cast<uint64_t>(pc_ + 1 * kInstrSize);
bool in_range = ((ipc ^ static_cast<uint64_t>(target)) >>
(kImm26Bits + kImmFieldShift)) == 0;
DCHECK(in_range && ((target & 3) == 0));
}
#endif
positions_recorder()->WriteRecordedPositions();
GenInstrJump(JAL, target >> 2);
GenInstrJump(JAL, (target >> 2) & kImm26Mask);
}
......@@ -2811,6 +2834,7 @@ int Assembler::RelocateInternalReference(RelocInfo::Mode rmode, byte* pc,
}
Instr instr = instr_at(pc);
DCHECK(RelocInfo::IsInternalReferenceEncoded(rmode));
DCHECK(IsJ(instr) || IsLui(instr) || IsJal(instr));
if (IsLui(instr)) {
Instr instr_lui = instr_at(pc + 0 * Assembler::kInstrSize);
Instr instr_ori = instr_at(pc + 1 * Assembler::kInstrSize);
......@@ -2842,8 +2866,21 @@ int Assembler::RelocateInternalReference(RelocInfo::Mode rmode, byte* pc,
instr_ori2 | (imm & kImm16Mask));
return 4; // Number of instructions patched.
} else {
UNREACHABLE();
return 0; // Number of instructions patched.
uint32_t imm28 = (instr & static_cast<int32_t>(kImm26Mask)) << 2;
if (static_cast<int32_t>(imm28) == kEndOfJumpChain) {
return 0; // Number of instructions patched.
}
imm28 += pc_delta;
imm28 &= kImm28Mask;
DCHECK((imm28 & 3) == 0);
instr &= ~kImm26Mask;
uint32_t imm26 = imm28 >> 2;
DCHECK(is_uint26(imm26));
instr_at_put(pc, instr | (imm26 & kImm26Mask));
return 1; // Number of instructions patched.
}
}
......@@ -3012,14 +3049,8 @@ void Assembler::CheckTrampolinePool() {
// references until associated instructions are emitted and available
// to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
// TODO(plind): Verify this, presume I cannot use macro-assembler
// here.
lui(at, (imm64 >> 32) & kImm16Mask);
ori(at, at, (imm64 >> 16) & kImm16Mask);
dsll(at, at, 16);
ori(at, at, imm64 & kImm16Mask);
j(imm64);
}
jr(at);
nop();
}
bind(&after_pool);
......
......@@ -521,6 +521,8 @@ class Assembler : public AssemblerBase {
// Return the code target address of the patch debug break slot
inline static Address break_address_from_return_address(Address pc);
static void JumpLabelToJumpRegister(Address pc);
static void QuietNaN(HeapObject* nan);
// This sets the branch destination (which gets loaded at the call address).
......@@ -1447,13 +1449,13 @@ class Assembler : public AssemblerBase {
int32_t get_trampoline_entry(int32_t pos);
int unbound_labels_count_;
// If trampoline is emitted, generated code is becoming large. As this is
// already a slow case which can possibly break our code generation for the
// extreme case, we use this information to trigger different mode of
// After trampoline is emitted, long branches are used in generated code for
// the forward branches whose target offsets could be beyond reach of branch
// instruction. We use this information to trigger different mode of
// branch instruction generation, where we use jump instructions rather
// than regular branch instructions.
bool trampoline_emitted_;
static const int kTrampolineSlotsSize = 6 * kInstrSize;
static const int kTrampolineSlotsSize = 2 * kInstrSize;
static const int kMaxBranchOffset = (1 << (18 - 1)) - 1;
static const int kInvalidSlotPos = -1;
......
......@@ -975,7 +975,7 @@ class Instruction {
return Bits(kImm21Shift + kImm21Bits - 1, kImm21Shift);
}
inline int32_t Imm26Value() const {
inline int64_t Imm26Value() const {
DCHECK(InstructionType() == kJumpType);
return Bits(kImm26Shift + kImm26Bits - 1, kImm26Shift);
}
......
......@@ -1652,7 +1652,7 @@ void MacroAssembler::BranchFCommon(SecondaryField sizeField, Label* target,
c(UN, D, cmp1, cmp2);
bc1f(&skip);
nop();
Jr(nan, bd);
J(nan, bd);
bind(&skip);
} else {
c(UN, D, cmp1, cmp2);
......@@ -1671,7 +1671,7 @@ void MacroAssembler::BranchFCommon(SecondaryField sizeField, Label* target,
cmp(UN, L, kDoubleCompareReg, cmp1, cmp2);
bc1eqz(&skip, kDoubleCompareReg);
nop();
Jr(nan, bd);
J(nan, bd);
bind(&skip);
} else {
cmp(UN, L, kDoubleCompareReg, cmp1, cmp2);
......@@ -1690,7 +1690,7 @@ void MacroAssembler::BranchFCommon(SecondaryField sizeField, Label* target,
Label skip;
Condition neg_cond = NegateFpuCondition(cond);
BranchShortF(sizeField, &skip, neg_cond, cmp1, cmp2, bd);
Jr(target, bd);
J(target, bd);
bind(&skip);
} else {
BranchShortF(sizeField, target, cond, cmp1, cmp2, bd);
......@@ -2126,11 +2126,11 @@ void MacroAssembler::Branch(Label* L, BranchDelaySlot bdslot) {
if (is_near(L)) {
BranchShort(L, bdslot);
} else {
Jr(L, bdslot);
J(L, bdslot);
}
} else {
if (is_trampoline_emitted()) {
Jr(L, bdslot);
J(L, bdslot);
} else {
BranchShort(L, bdslot);
}
......@@ -2149,10 +2149,10 @@ void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
Label skip;
Condition neg_cond = NegateCondition(cond);
BranchShort(&skip, neg_cond, rs, rt);
Jr(L, bdslot);
J(L, bdslot);
bind(&skip);
} else {
Jr(L, bdslot);
J(L, bdslot);
}
}
} else {
......@@ -2161,10 +2161,10 @@ void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
Label skip;
Condition neg_cond = NegateCondition(cond);
BranchShort(&skip, neg_cond, rs, rt);
Jr(L, bdslot);
J(L, bdslot);
bind(&skip);
} else {
Jr(L, bdslot);
J(L, bdslot);
}
} else {
BranchShort(L, cond, rs, rt, bdslot);
......@@ -2730,11 +2730,11 @@ void MacroAssembler::BranchAndLink(Label* L, BranchDelaySlot bdslot) {
if (is_near(L)) {
BranchAndLinkShort(L, bdslot);
} else {
Jalr(L, bdslot);
Jal(L, bdslot);
}
} else {
if (is_trampoline_emitted()) {
Jalr(L, bdslot);
Jal(L, bdslot);
} else {
BranchAndLinkShort(L, bdslot);
}
......@@ -2752,7 +2752,7 @@ void MacroAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
Label skip;
Condition neg_cond = NegateCondition(cond);
BranchShort(&skip, neg_cond, rs, rt);
Jalr(L, bdslot);
J(L, bdslot);
bind(&skip);
}
} else {
......@@ -2760,7 +2760,7 @@ void MacroAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
Label skip;
Condition neg_cond = NegateCondition(cond);
BranchShort(&skip, neg_cond, rs, rt);
Jalr(L, bdslot);
Jal(L, bdslot);
bind(&skip);
} else {
BranchAndLinkShort(L, cond, rs, rt, bdslot);
......@@ -3188,6 +3188,40 @@ void MacroAssembler::Ret(Condition cond,
}
void MacroAssembler::J(Label* L, BranchDelaySlot bdslot) {
BlockTrampolinePoolScope block_trampoline_pool(this);
uint64_t imm28;
imm28 = jump_address(L);
{
BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal references
// until associated instructions are emitted and available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
j(imm28);
}
// Emit a nop in the branch delay slot if required.
if (bdslot == PROTECT) nop();
}
void MacroAssembler::Jal(Label* L, BranchDelaySlot bdslot) {
BlockTrampolinePoolScope block_trampoline_pool(this);
uint64_t imm28;
imm28 = jump_address(L);
{
BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal references
// until associated instructions are emitted and available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE_ENCODED);
jal(imm28);
}
// Emit a nop in the branch delay slot if required.
if (bdslot == PROTECT) nop();
}
void MacroAssembler::Jr(Label* L, BranchDelaySlot bdslot) {
BlockTrampolinePoolScope block_trampoline_pool(this);
......
......@@ -1702,6 +1702,8 @@ const Operand& rt = Operand(zero_reg), BranchDelaySlot bd = PROTECT
void BranchAndLinkShort(Label* L, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot = PROTECT);
void J(Label* L, BranchDelaySlot bdslot);
void Jal(Label* L, BranchDelaySlot bdslot);
void Jr(Label* L, BranchDelaySlot bdslot);
void Jalr(Label* L, BranchDelaySlot bdslot);
......
......@@ -4321,11 +4321,11 @@ void Simulator::DecodeTypeImmediate(Instruction* instr) {
// Type 3: instructions using a 26 bytes immediate. (e.g. j, jal).
void Simulator::DecodeTypeJump(Instruction* instr) {
// Get current pc.
int32_t current_pc = get_pc();
int64_t current_pc = get_pc();
// Get unchanged bits of pc.
int32_t pc_high_bits = current_pc & 0xf0000000;
int64_t pc_high_bits = current_pc & 0xfffffffff0000000;
// Next pc.
int32_t next_pc = pc_high_bits | (instr->Imm26Value() << 2);
int64_t next_pc = pc_high_bits | (instr->Imm26Value() << 2);
// Execute branch delay slot.
// We don't check for end_sim_pc. First it should not be met as the current pc
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment