Commit b6afa34a authored by sgjesse@chromium.org's avatar sgjesse@chromium.org

MIPS: Long branch implementation and trampoline improvement.

Improve the branch and branch-trampoline mechanism to automatically
use long-jumps when function size grows large. Reduce size of emitted
trampoline pools.

Now passes mozilla regress-80981.js.

BUG=
TEST=

Review URL: http://codereview.chromium.org//7239020
Patch from Paul Lind <plind44@gmail.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8433 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 614e5428
......@@ -83,8 +83,12 @@ bool Operand::is_reg() const {
// RelocInfo.
void RelocInfo::apply(intptr_t delta) {
// On MIPS we do not use pc relative addressing, so we don't need to patch the
// code here.
if (IsInternalReference(rmode_)) {
// Absolute code pointer inside code object moves with the code object.
byte* p = reinterpret_cast<byte*>(pc_);
int count = Assembler::RelocateInternalReference(p, delta);
CPU::FlushICache(p, count * sizeof(uint32_t));
}
}
......@@ -300,7 +304,9 @@ void Assembler::CheckTrampolinePoolQuick() {
void Assembler::emit(Instr x) {
if (!is_buffer_growth_blocked()) {
CheckBuffer();
}
*reinterpret_cast<Instr*>(pc_) = x;
pc_ += kInstrSize;
CheckTrampolinePoolQuick();
......
This diff is collapsed.
......@@ -481,6 +481,9 @@ class Assembler : public AssemblerBase {
// Note: The same Label can be used for forward and backward branches
// but it may be bound only once.
void bind(Label* L); // Binds an unbound label L to current code position.
// Determines if Label is bound and near enough so that branch instruction
// can be used to reach it, instead of jump instruction.
bool is_near(Label* L);
// Returns the branch offset to the given label from the current code
// position. Links the label to the current position if it is still unbound.
......@@ -491,6 +494,7 @@ class Assembler : public AssemblerBase {
ASSERT((o & 3) == 0); // Assert the offset is aligned.
return o >> 2;
}
uint32_t jump_address(Label* L);
// Puts a labels target address at the given position.
// The high 8 bits are set to zero.
......@@ -795,6 +799,25 @@ class Assembler : public AssemblerBase {
DISALLOW_IMPLICIT_CONSTRUCTORS(BlockTrampolinePoolScope);
};
// Class for postponing the assembly buffer growth. Typically used for
// sequences of instructions that must be emitted as a unit, before
// buffer growth (and relocation) can occur.
// This blocking scope is not nestable.
class BlockGrowBufferScope {
public:
explicit BlockGrowBufferScope(Assembler* assem) : assem_(assem) {
assem_->StartBlockGrowBuffer();
}
~BlockGrowBufferScope() {
assem_->EndBlockGrowBuffer();
}
private:
Assembler* assem_;
DISALLOW_IMPLICIT_CONSTRUCTORS(BlockGrowBufferScope);
};
// Debugging.
// Mark address of the ExitJSFrame code.
......@@ -811,6 +834,8 @@ class Assembler : public AssemblerBase {
// Use --code-comments to enable.
void RecordComment(const char* msg);
static int RelocateInternalReference(byte* pc, intptr_t pc_delta);
// Writes a single byte or word of data in the code stream. Used for
// inline tables, e.g., jump-tables.
void db(uint8_t data);
......@@ -847,6 +872,11 @@ class Assembler : public AssemblerBase {
static bool IsBeq(Instr instr);
static bool IsBne(Instr instr);
static bool IsJump(Instr instr);
static bool IsJ(Instr instr);
static bool IsLui(Instr instr);
static bool IsOri(Instr instr);
static bool IsNop(Instr instr, unsigned int type);
static bool IsPop(Instr instr);
static bool IsPush(Instr instr);
......@@ -868,6 +898,8 @@ class Assembler : public AssemblerBase {
static uint32_t GetSa(Instr instr);
static uint32_t GetSaField(Instr instr);
static uint32_t GetOpcodeField(Instr instr);
static uint32_t GetFunction(Instr instr);
static uint32_t GetFunctionField(Instr instr);
static uint32_t GetImmediate16(Instr instr);
static uint32_t GetLabelConst(Instr instr);
......@@ -883,7 +915,7 @@ class Assembler : public AssemblerBase {
static bool IsAndImmediate(Instr instr);
void CheckTrampolinePool(bool force_emit = false);
void CheckTrampolinePool();
protected:
// Relocation for a type-recording IC has the AST id added to it. This
......@@ -916,6 +948,7 @@ class Assembler : public AssemblerBase {
void StartBlockTrampolinePool() {
trampoline_pool_blocked_nesting_++;
}
void EndBlockTrampolinePool() {
trampoline_pool_blocked_nesting_--;
}
......@@ -928,6 +961,25 @@ class Assembler : public AssemblerBase {
return internal_trampoline_exception_;
}
bool is_trampoline_emitted() const {
return trampoline_emitted_;
}
// Temporarily block automatic assembly buffer growth.
void StartBlockGrowBuffer() {
ASSERT(!block_buffer_growth_);
block_buffer_growth_ = true;
}
void EndBlockGrowBuffer() {
ASSERT(block_buffer_growth_);
block_buffer_growth_ = false;
}
bool is_buffer_growth_blocked() const {
return block_buffer_growth_;
}
private:
// Code buffer:
// The buffer into which code and relocation info are generated.
......@@ -964,6 +1016,9 @@ class Assembler : public AssemblerBase {
// Keep track of the last emitted pool to guarantee a maximal distance.
int last_trampoline_pool_end_; // pc offset of the end of the last pool.
// Automatic growth of the assembly buffer may be blocked for some sequences.
bool block_buffer_growth_; // Block growth when true.
// Relocation information generation.
// Each relocation is encoded as a variable size value.
static const int kMaxRelocSize = RelocInfoWriter::kMaxSize;
......@@ -1044,7 +1099,6 @@ class Assembler : public AssemblerBase {
// Labels.
void print(Label* L);
void bind_to(Label* L, int pos);
void link_to(Label* L, Label* appendix);
void next(Label* L);
// One trampoline consists of:
......@@ -1057,13 +1111,17 @@ class Assembler : public AssemblerBase {
// label_count * kInstrSize.
class Trampoline {
public:
Trampoline(int start, int slot_count, int label_count) {
Trampoline() {
start_ = 0;
next_slot_ = 0;
free_slot_count_ = 0;
end_ = 0;
}
Trampoline(int start, int slot_count) {
start_ = start;
next_slot_ = start;
free_slot_count_ = slot_count;
next_label_ = start + slot_count * 2 * kInstrSize;
free_label_count_ = label_count;
end_ = next_label_ + (label_count - 1) * kInstrSize;
end_ = start + slot_count * kTrampolineSlotsSize;
}
int start() {
return start_;
......@@ -1082,41 +1140,30 @@ class Assembler : public AssemblerBase {
} else {
trampoline_slot = next_slot_;
free_slot_count_--;
next_slot_ += 2*kInstrSize;
next_slot_ += kTrampolineSlotsSize;
}
return trampoline_slot;
}
int take_label() {
int label_pos = next_label_;
ASSERT(free_label_count_ > 0);
free_label_count_--;
next_label_ += kInstrSize;
return label_pos;
}
private:
int start_;
int end_;
int next_slot_;
int free_slot_count_;
int next_label_;
int free_label_count_;
};
int32_t get_label_entry(int32_t pos, bool next_pool = true);
int32_t get_trampoline_entry(int32_t pos, bool next_pool = true);
static const int kSlotsPerTrampoline = 2304;
static const int kLabelsPerTrampoline = 8;
static const int kTrampolineInst =
2 * kSlotsPerTrampoline + kLabelsPerTrampoline;
static const int kTrampolineSize = kTrampolineInst * kInstrSize;
int32_t get_trampoline_entry(int32_t pos);
int unbound_labels_count_;
// If trampoline is emitted, generated code is becoming large. As this is
// already a slow case which can possibly break our code generation for the
// extreme case, we use this information to trigger different mode of
// branch instruction generation, where we use jump instructions rather
// than regular branch instructions.
bool trampoline_emitted_;
static const int kTrampolineSlotsSize = 4 * kInstrSize;
static const int kMaxBranchOffset = (1 << (18 - 1)) - 1;
static const int kMaxDistBetweenPools =
kMaxBranchOffset - 2 * kTrampolineSize;
static const int kInvalidSlotPos = -1;
List<Trampoline> trampolines_;
Trampoline trampoline_;
bool internal_trampoline_exception_;
friend class RegExpMacroAssemblerMIPS;
......
......@@ -344,7 +344,7 @@ static void ArrayNativeCode(MacroAssembler* masm,
// Handle construction of an empty array of a certain size. Bail out if size
// is too large to actually allocate an elements array.
ASSERT(kSmiTag == 0);
__ Branch(call_generic_code, ge, a2,
__ Branch(call_generic_code, Ugreater_equal, a2,
Operand(JSObject::kInitialMaxFastElementArray << kSmiTagSize));
// a0: argc
......
......@@ -201,6 +201,8 @@ static const int kImm16Shift = 0;
static const int kImm16Bits = 16;
static const int kImm26Shift = 0;
static const int kImm26Bits = 26;
static const int kImm28Shift = 0;
static const int kImm28Bits = 28;
static const int kFsShift = 11;
static const int kFsBits = 5;
......@@ -220,6 +222,7 @@ static const int kFBtrueBits = 1;
static const int kOpcodeMask = ((1 << kOpcodeBits) - 1) << kOpcodeShift;
static const int kImm16Mask = ((1 << kImm16Bits) - 1) << kImm16Shift;
static const int kImm26Mask = ((1 << kImm26Bits) - 1) << kImm26Shift;
static const int kImm28Mask = ((1 << kImm28Bits) - 1) << kImm28Shift;
static const int kRsFieldMask = ((1 << kRsBits) - 1) << kRsShift;
static const int kRtFieldMask = ((1 << kRtBits) - 1) << kRtShift;
static const int kRdFieldMask = ((1 << kRdBits) - 1) << kRdShift;
......
......@@ -1116,7 +1116,54 @@ void MacroAssembler::GetLeastBitsFromInt32(Register dst,
(cond != cc_always && (!rs.is(zero_reg) || !rt.rm().is(zero_reg))))
bool MacroAssembler::UseAbsoluteCodePointers() {
if (is_trampoline_emitted()) {
return true;
} else {
return false;
}
}
void MacroAssembler::Branch(int16_t offset, BranchDelaySlot bdslot) {
BranchShort(offset, bdslot);
}
void MacroAssembler::Branch(int16_t offset, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot) {
BranchShort(offset, cond, rs, rt, bdslot);
}
void MacroAssembler::Branch(Label* L, BranchDelaySlot bdslot) {
bool is_label_near = is_near(L);
if (UseAbsoluteCodePointers() && !is_label_near) {
Jr(L, bdslot);
} else {
BranchShort(L, bdslot);
}
}
void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot) {
bool is_label_near = is_near(L);
if (UseAbsoluteCodePointers() && !is_label_near) {
Label skip;
Condition neg_cond = NegateCondition(cond);
BranchShort(&skip, neg_cond, rs, rt);
Jr(L, bdslot);
bind(&skip);
} else {
BranchShort(L, cond, rs, rt, bdslot);
}
}
void MacroAssembler::BranchShort(int16_t offset, BranchDelaySlot bdslot) {
b(offset);
// Emit a nop in the branch delay slot if required.
......@@ -1125,7 +1172,7 @@ void MacroAssembler::Branch(int16_t offset, BranchDelaySlot bdslot) {
}
void MacroAssembler::Branch(int16_t offset, Condition cond, Register rs,
void MacroAssembler::BranchShort(int16_t offset, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot) {
BRANCH_ARGS_CHECK(cond, rs, rt);
......@@ -1199,7 +1246,8 @@ void MacroAssembler::Branch(int16_t offset, Condition cond, Register rs,
break;
case Uless:
if (r2.is(zero_reg)) {
b(offset);
// No code needs to be emitted.
return;
} else {
sltu(scratch, rs, r2);
bne(scratch, zero_reg, offset);
......@@ -1258,7 +1306,7 @@ void MacroAssembler::Branch(int16_t offset, Condition cond, Register rs,
} else {
r2 = scratch;
li(r2, rt);
sltu(scratch, rs, r2);
slt(scratch, rs, r2);
beq(scratch, zero_reg, offset);
}
break;
......@@ -1311,7 +1359,8 @@ void MacroAssembler::Branch(int16_t offset, Condition cond, Register rs,
break;
case Uless:
if (rt.imm32_ == 0) {
b(offset);
// No code needs to be emitted.
return;
} else if (is_int16(rt.imm32_)) {
sltiu(scratch, rs, rt.imm32_);
bne(scratch, zero_reg, offset);
......@@ -1342,7 +1391,7 @@ void MacroAssembler::Branch(int16_t offset, Condition cond, Register rs,
}
void MacroAssembler::Branch(Label* L, BranchDelaySlot bdslot) {
void MacroAssembler::BranchShort(Label* L, BranchDelaySlot bdslot) {
// We use branch_offset as an argument for the branch instructions to be sure
// it is called just before generating the branch instruction, as needed.
......@@ -1354,7 +1403,7 @@ void MacroAssembler::Branch(Label* L, BranchDelaySlot bdslot) {
}
void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
void MacroAssembler::BranchShort(Label* L, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot) {
BRANCH_ARGS_CHECK(cond, rs, rt);
......@@ -1444,8 +1493,8 @@ void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
break;
case Uless:
if (r2.is(zero_reg)) {
offset = shifted_branch_offset(L, false);
b(offset);
// No code needs to be emitted.
return;
} else {
sltu(scratch, rs, r2);
offset = shifted_branch_offset(L, false);
......@@ -1510,7 +1559,7 @@ void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
} else {
r2 = scratch;
li(r2, rt);
sltu(scratch, rs, r2);
slt(scratch, rs, r2);
offset = shifted_branch_offset(L, false);
beq(scratch, zero_reg, offset);
}
......@@ -1574,8 +1623,8 @@ void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
break;
case Uless:
if (rt.imm32_ == 0) {
offset = shifted_branch_offset(L, false);
b(offset);
// No code needs to be emitted.
return;
} else if (is_int16(rt.imm32_)) {
sltiu(scratch, rs, rt.imm32_);
offset = shifted_branch_offset(L, false);
......@@ -1612,10 +1661,48 @@ void MacroAssembler::Branch(Label* L, Condition cond, Register rs,
}
void MacroAssembler::BranchAndLink(int16_t offset, BranchDelaySlot bdslot) {
BranchAndLinkShort(offset, bdslot);
}
void MacroAssembler::BranchAndLink(int16_t offset, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot) {
BranchAndLinkShort(offset, cond, rs, rt, bdslot);
}
void MacroAssembler::BranchAndLink(Label* L, BranchDelaySlot bdslot) {
bool is_label_near = is_near(L);
if (UseAbsoluteCodePointers() && !is_label_near) {
Jalr(L, bdslot);
} else {
BranchAndLinkShort(L, bdslot);
}
}
void MacroAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot) {
bool is_label_near = is_near(L);
if (UseAbsoluteCodePointers() && !is_label_near) {
Label skip;
Condition neg_cond = NegateCondition(cond);
BranchShort(&skip, neg_cond, rs, rt);
Jalr(L, bdslot);
bind(&skip);
} else {
BranchAndLinkShort(L, cond, rs, rt, bdslot);
}
}
// We need to use a bgezal or bltzal, but they can't be used directly with the
// slt instructions. We could use sub or add instead but we would miss overflow
// cases, so we keep slt and add an intermediate third instruction.
void MacroAssembler::BranchAndLink(int16_t offset,
void MacroAssembler::BranchAndLinkShort(int16_t offset,
BranchDelaySlot bdslot) {
bal(offset);
......@@ -1625,8 +1712,8 @@ void MacroAssembler::BranchAndLink(int16_t offset,
}
void MacroAssembler::BranchAndLink(int16_t offset, Condition cond, Register rs,
const Operand& rt,
void MacroAssembler::BranchAndLinkShort(int16_t offset, Condition cond,
Register rs, const Operand& rt,
BranchDelaySlot bdslot) {
BRANCH_ARGS_CHECK(cond, rs, rt);
Register r2 = no_reg;
......@@ -1707,7 +1794,7 @@ void MacroAssembler::BranchAndLink(int16_t offset, Condition cond, Register rs,
}
void MacroAssembler::BranchAndLink(Label* L, BranchDelaySlot bdslot) {
void MacroAssembler::BranchAndLinkShort(Label* L, BranchDelaySlot bdslot) {
bal(shifted_branch_offset(L, false));
// Emit a nop in the branch delay slot if required.
......@@ -1716,7 +1803,7 @@ void MacroAssembler::BranchAndLink(Label* L, BranchDelaySlot bdslot) {
}
void MacroAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
void MacroAssembler::BranchAndLinkShort(Label* L, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot) {
BRANCH_ARGS_CHECK(cond, rs, rt);
......@@ -1814,6 +1901,64 @@ void MacroAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
}
void MacroAssembler::J(Label* L, BranchDelaySlot bdslot) {
BlockTrampolinePoolScope block_trampoline_pool(this);
uint32_t imm28;
imm28 = jump_address(L);
imm28 &= kImm28Mask;
{ BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal references
// until associated instructions are emitted and available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
j(imm28);
}
// Emit a nop in the branch delay slot if required.
if (bdslot == PROTECT)
nop();
}
void MacroAssembler::Jr(Label* L, BranchDelaySlot bdslot) {
BlockTrampolinePoolScope block_trampoline_pool(this);
uint32_t imm32;
imm32 = jump_address(L);
{ BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal references
// until associated instructions are emitted and available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
lui(at, (imm32 & kHiMask) >> kLuiShift);
ori(at, at, (imm32 & kImm16Mask));
}
jr(at);
// Emit a nop in the branch delay slot if required.
if (bdslot == PROTECT)
nop();
}
void MacroAssembler::Jalr(Label* L, BranchDelaySlot bdslot) {
BlockTrampolinePoolScope block_trampoline_pool(this);
uint32_t imm32;
imm32 = jump_address(L);
{ BlockGrowBufferScope block_buf_growth(this);
// Buffer growth (and relocation) must be blocked for internal references
// until associated instructions are emitted and available to be patched.
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
lui(at, (imm32 & kHiMask) >> kLuiShift);
ori(at, at, (imm32 & kImm16Mask));
}
jalr(at);
// Emit a nop in the branch delay slot if required.
if (bdslot == PROTECT)
nop();
}
void MacroAssembler::Jump(const Operand& target, BranchDelaySlot bdslot) {
BlockTrampolinePoolScope block_trampoline_pool(this);
if (target.is_reg()) {
......
......@@ -1105,6 +1105,26 @@ DECLARE_NOTARGET_PROTOTYPE(Ret)
Register scratch,
int num_arguments);
void BranchShort(int16_t offset, BranchDelaySlot bdslot = PROTECT);
void BranchShort(int16_t offset, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot = PROTECT);
void BranchShort(Label* L, BranchDelaySlot bdslot = PROTECT);
void BranchShort(Label* L, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot = PROTECT);
void BranchAndLinkShort(int16_t offset, BranchDelaySlot bdslot = PROTECT);
void BranchAndLinkShort(int16_t offset, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot = PROTECT);
void BranchAndLinkShort(Label* L, BranchDelaySlot bdslot = PROTECT);
void BranchAndLinkShort(Label* L, Condition cond, Register rs,
const Operand& rt,
BranchDelaySlot bdslot = PROTECT);
void J(Label* L, BranchDelaySlot bdslot);
void Jr(Label* L, BranchDelaySlot bdslot);
void Jalr(Label* L, BranchDelaySlot bdslot);
void Jump(intptr_t target, RelocInfo::Mode rmode,
BranchDelaySlot bd = PROTECT);
void Jump(intptr_t target, RelocInfo::Mode rmode, Condition cond = cc_always,
......@@ -1145,6 +1165,8 @@ DECLARE_NOTARGET_PROTOTYPE(Ret)
MemOperand SafepointRegisterSlot(Register reg);
MemOperand SafepointRegistersAndDoublesSlot(Register reg);
bool UseAbsoluteCodePointers();
bool generating_stub_;
bool allow_stub_calls_;
// This handle will be patched with the code object on installation.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment