Commit b49dd13c authored by danno@chromium.org's avatar danno@chromium.org

MIPS: Branch delay slot and other optimizations.

List of changes:
-added a minor optimization to the Simulator that quickly skips nops in the delay slot
-slightly re-worked CEntryStub to save a few instructions
 CEntryStub now expects the following values:
  -s0: number of arguments including receiver
  -s1: size of arguments excluding receiver
  -s2: pointer to builtin function
 Two new MacroAssembler functions were added to make usage more convenient:
  -PrepareCEntryArgs(int num_args) to set up s0 and s1
  -PrepareCEntryFunction(const ExternalReference&) to set up s2
-removed branch delay slot nops from the most frequently used code areas
-reorganized some code to execute fewer instructions
-utilized the delay slot of most Ret instructions
 This does not cover all Rets, only the most obvious cases.
 Also added a special version of DropAndRet that utilizes the delay slot.
-added some comments to code areas where explanation of the register/delay slot usage may be needed
-added an optimization to Jump so it doesn't always pre-load the target register

BUG=
TEST=

Review URL: https://chromiumcodereview.appspot.com/9699071
Patch from Daniel Kalmar <kalmard@homejinni.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11099 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 790219ec
......@@ -67,9 +67,11 @@ void Builtins::Generate_Adaptor(MacroAssembler* masm,
ASSERT(extra_args == NO_EXTRA_ARGUMENTS);
}
// JumpToExternalReference expects a0 to contain the number of arguments
// JumpToExternalReference expects s0 to contain the number of arguments
// including the receiver and the extra arguments.
__ Addu(a0, a0, Operand(num_extra_args + 1));
__ Addu(s0, a0, num_extra_args + 1);
__ sll(s1, s0, kPointerSizeLog2);
__ Subu(s1, s1, kPointerSize);
__ JumpToExternalReference(ExternalReference(id, masm->isolate()));
}
......
This diff is collapsed.
......@@ -152,8 +152,8 @@ static void Generate_DebugBreakCallHelper(MacroAssembler* masm,
#ifdef DEBUG
__ RecordComment("// Calling from debug break to runtime - come in - over");
#endif
__ mov(a0, zero_reg); // No arguments.
__ li(a1, Operand(ExternalReference::debug_break(masm->isolate())));
__ PrepareCEntryArgs(0); // No arguments.
__ PrepareCEntryFunction(ExternalReference::debug_break(masm->isolate()));
CEntryStub ceb(1);
__ CallStub(&ceb);
......
......@@ -512,8 +512,8 @@ void CallICBase::GenerateMiss(MacroAssembler* masm,
__ Push(a3, a2);
// Call the entry.
__ li(a0, Operand(2));
__ li(a1, Operand(ExternalReference(IC_Utility(id), isolate)));
__ PrepareCEntryArgs(2);
__ PrepareCEntryFunction(ExternalReference(IC_Utility(id), isolate));
CEntryStub stub(1);
__ CallStub(&stub);
......@@ -844,8 +844,8 @@ void KeyedLoadIC::GenerateNonStrictArguments(MacroAssembler* masm) {
Label slow, notin;
MemOperand mapped_location =
GenerateMappedArgumentsLookup(masm, a1, a0, a2, a3, t0, &notin, &slow);
__ Ret(USE_DELAY_SLOT);
__ lw(v0, mapped_location);
__ Ret();
__ bind(&notin);
// The unmapped lookup expects that the parameter map is in a2.
MemOperand unmapped_location =
......@@ -853,8 +853,8 @@ void KeyedLoadIC::GenerateNonStrictArguments(MacroAssembler* masm) {
__ lw(a2, unmapped_location);
__ LoadRoot(a3, Heap::kTheHoleValueRootIndex);
__ Branch(&slow, eq, a2, Operand(a3));
__ Ret(USE_DELAY_SLOT);
__ mov(v0, a2);
__ Ret();
__ bind(&slow);
GenerateMiss(masm, false);
}
......
......@@ -634,13 +634,9 @@ void LCodeGen::DeoptimizeIf(Condition cc,
__ bind(&skip);
}
if (cc == al) {
__ Jump(entry, RelocInfo::RUNTIME_ENTRY);
} else {
// TODO(plind): The Arm port is a little different here, due to their
// DeOpt jump table, which is not used for Mips yet.
__ Jump(entry, RelocInfo::RUNTIME_ENTRY, cc, src1, src2);
}
// TODO(plind): The Arm port is a little different here, due to their
// DeOpt jump table, which is not used for Mips yet.
__ Jump(entry, RelocInfo::RUNTIME_ENTRY, cc, src1, src2);
}
......
......@@ -2438,8 +2438,15 @@ void MacroAssembler::Jump(intptr_t target,
Register rs,
const Operand& rt,
BranchDelaySlot bd) {
Label skip;
if (cond != cc_always) {
Branch(USE_DELAY_SLOT, &skip, NegateCondition(cond), rs, rt);
}
// The first instruction of 'li' may be placed in the delay slot.
// This is not an issue, t9 is expected to be clobbered anyway.
li(t9, Operand(target, rmode));
Jump(t9, cond, rs, rt, bd);
Jump(t9, al, zero_reg, Operand(zero_reg), bd);
bind(&skip);
}
......@@ -2569,7 +2576,7 @@ void MacroAssembler::Call(Handle<Code> code,
rmode = RelocInfo::CODE_TARGET_WITH_ID;
}
Call(reinterpret_cast<Address>(code.location()), rmode, cond, rs, rt, bd);
ASSERT_EQ(CallSize(code, rmode, ast_id, cond, rs, rt),
ASSERT_EQ(CallSize(code, rmode, ast_id, cond, rs, rt, bd),
SizeOfCodeGeneratedSince(&start));
}
......@@ -2639,14 +2646,16 @@ void MacroAssembler::Jalr(Label* L, BranchDelaySlot bdslot) {
nop();
}
void MacroAssembler::DropAndRet(int drop) {
Ret(USE_DELAY_SLOT);
addiu(sp, sp, drop * kPointerSize);
}
void MacroAssembler::DropAndRet(int drop,
Condition cond,
Register r1,
const Operand& r2) {
// This is a workaround to make sure only one branch instruction is
// generated. It relies on Drop and Ret not creating branches if
// cond == cc_always.
// Both Drop and Ret need to be conditional.
Label skip;
if (cond != cc_always) {
Branch(&skip, NegateCondition(cond), r1, r2);
......@@ -2713,8 +2722,8 @@ void MacroAssembler::Push(Handle<Object> handle) {
#ifdef ENABLE_DEBUGGER_SUPPORT
void MacroAssembler::DebugBreak() {
mov(a0, zero_reg);
li(a1, Operand(ExternalReference(Runtime::kDebugBreak, isolate())));
PrepareCEntryArgs(0);
PrepareCEntryFunction(ExternalReference(Runtime::kDebugBreak, isolate()));
CEntryStub ces(1);
ASSERT(AllowThisStubCall(&ces));
Call(ces.GetCode(), RelocInfo::DEBUG_BREAK);
......@@ -3876,10 +3885,13 @@ void MacroAssembler::GetObjectType(Register object,
// -----------------------------------------------------------------------------
// Runtime calls.
void MacroAssembler::CallStub(CodeStub* stub, Condition cond,
Register r1, const Operand& r2) {
void MacroAssembler::CallStub(CodeStub* stub,
Condition cond,
Register r1,
const Operand& r2,
BranchDelaySlot bd) {
ASSERT(AllowThisStubCall(stub)); // Stub calls are not allowed in some stubs.
Call(stub->GetCode(), RelocInfo::CODE_TARGET, kNoASTId, cond, r1, r2);
Call(stub->GetCode(), RelocInfo::CODE_TARGET, kNoASTId, cond, r1, r2, bd);
}
......@@ -3962,8 +3974,7 @@ void MacroAssembler::CallApiFunctionAndReturn(ExternalReference function,
lw(t1, MemOperand(at));
Branch(&promote_scheduled_exception, ne, t0, Operand(t1));
li(s0, Operand(stack_space));
LeaveExitFrame(false, s0);
Ret();
LeaveExitFrame(false, s0, true);
bind(&promote_scheduled_exception);
TailCallExternalReference(
......@@ -4161,8 +4172,8 @@ void MacroAssembler::CallRuntime(const Runtime::Function* f,
// arguments passed in because it is constant. At some point we
// should remove this need and make the runtime routine entry code
// smarter.
li(a0, num_arguments);
li(a1, Operand(ExternalReference(f, isolate())));
PrepareCEntryArgs(num_arguments);
PrepareCEntryFunction(ExternalReference(f, isolate()));
CEntryStub stub(1);
CallStub(&stub);
}
......@@ -4170,8 +4181,8 @@ void MacroAssembler::CallRuntime(const Runtime::Function* f,
void MacroAssembler::CallRuntimeSaveDoubles(Runtime::FunctionId id) {
const Runtime::Function* function = Runtime::FunctionForId(id);
li(a0, Operand(function->nargs));
li(a1, Operand(ExternalReference(function, isolate())));
PrepareCEntryArgs(function->nargs);
PrepareCEntryFunction(ExternalReference(function, isolate()));
CEntryStub stub(1, kSaveFPRegs);
CallStub(&stub);
}
......@@ -4183,12 +4194,13 @@ void MacroAssembler::CallRuntime(Runtime::FunctionId fid, int num_arguments) {
void MacroAssembler::CallExternalReference(const ExternalReference& ext,
int num_arguments) {
li(a0, Operand(num_arguments));
li(a1, Operand(ext));
int num_arguments,
BranchDelaySlot bd) {
PrepareCEntryArgs(num_arguments);
PrepareCEntryFunction(ext);
CEntryStub stub(1);
CallStub(&stub);
CallStub(&stub, al, zero_reg, Operand(zero_reg), bd);
}
......@@ -4199,7 +4211,7 @@ void MacroAssembler::TailCallExternalReference(const ExternalReference& ext,
// arguments passed in because it is constant. At some point we
// should remove this need and make the runtime routine entry code
// smarter.
li(a0, Operand(num_arguments));
PrepareCEntryArgs(num_arguments);
JumpToExternalReference(ext);
}
......@@ -4213,10 +4225,16 @@ void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid,
}
void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin) {
li(a1, Operand(builtin));
void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
BranchDelaySlot bd) {
PrepareCEntryFunction(builtin);
CEntryStub stub(1);
Jump(stub.GetCode(), RelocInfo::CODE_TARGET);
Jump(stub.GetCode(),
RelocInfo::CODE_TARGET,
al,
zero_reg,
Operand(zero_reg),
bd);
}
......@@ -4563,7 +4581,8 @@ void MacroAssembler::EnterExitFrame(bool save_doubles,
void MacroAssembler::LeaveExitFrame(bool save_doubles,
Register argument_count) {
Register argument_count,
bool do_return) {
// Optionally restore all double registers.
if (save_doubles) {
// Remember: we only need to restore every 2nd double FPU value.
......@@ -4589,11 +4608,17 @@ void MacroAssembler::LeaveExitFrame(bool save_doubles,
mov(sp, fp); // Respect ABI stack constraint.
lw(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset));
lw(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset));
addiu(sp, sp, 8);
if (argument_count.is_valid()) {
sll(t8, argument_count, kPointerSizeLog2);
addu(sp, sp, t8);
}
if (do_return) {
Ret(USE_DELAY_SLOT);
// If returning, the instruction in the delay slot will be the addiu below.
}
addiu(sp, sp, 8);
}
......
......@@ -193,10 +193,14 @@ class MacroAssembler: public Assembler {
Register reg = no_reg,
const Operand& op = Operand(no_reg));
void DropAndRet(int drop = 0,
Condition cond = cc_always,
Register reg = no_reg,
const Operand& op = Operand(no_reg));
// Trivial case of DropAndRet that utilizes the delay slot and only emits
// 2 instructions.
void DropAndRet(int drop);
void DropAndRet(int drop,
Condition cond,
Register reg,
const Operand& op);
// Swap two registers. If the scratch register is omitted then a slightly
// less efficient form using xor instead of mov is emitted.
......@@ -773,7 +777,9 @@ class MacroAssembler: public Assembler {
int stack_space = 0);
// Leave the current exit frame.
void LeaveExitFrame(bool save_doubles, Register arg_count);
void LeaveExitFrame(bool save_doubles,
Register arg_count,
bool do_return = false);
// Get the actual activation frame alignment for target environment.
static int ActivationFrameAlignment();
......@@ -1084,9 +1090,22 @@ class MacroAssembler: public Assembler {
// -------------------------------------------------------------------------
// Runtime calls.
// See comments at the beginning of CEntryStub::Generate.
inline void PrepareCEntryArgs(int num_args) {
li(s0, num_args);
li(s1, (num_args - 1) * kPointerSize);
}
inline void PrepareCEntryFunction(const ExternalReference& ref) {
li(s2, Operand(ref));
}
// Call a code stub.
void CallStub(CodeStub* stub, Condition cond = cc_always,
Register r1 = zero_reg, const Operand& r2 = Operand(zero_reg));
void CallStub(CodeStub* stub,
Condition cond = cc_always,
Register r1 = zero_reg,
const Operand& r2 = Operand(zero_reg),
BranchDelaySlot bd = PROTECT);
// Tail call a code stub (jump).
void TailCallStub(CodeStub* stub);
......@@ -1102,7 +1121,8 @@ class MacroAssembler: public Assembler {
// Convenience function: call an external reference.
void CallExternalReference(const ExternalReference& ext,
int num_arguments);
int num_arguments,
BranchDelaySlot bd = PROTECT);
// Tail call of a runtime routine (jump).
// Like JumpToExternalReference, but also takes care of passing the number
......@@ -1168,7 +1188,8 @@ class MacroAssembler: public Assembler {
void CallApiFunctionAndReturn(ExternalReference function, int stack_space);
// Jump to the builtin routine.
void JumpToExternalReference(const ExternalReference& builtin);
void JumpToExternalReference(const ExternalReference& builtin,
BranchDelaySlot bd = PROTECT);
// Invoke specified builtin JavaScript function. Adds an entry to
// the unresolved list if the name does not resolve.
......
......@@ -309,6 +309,14 @@ class Simulator {
void InstructionDecode(Instruction* instr);
// Execute one instruction placed in a branch delay slot.
void BranchDelayInstructionDecode(Instruction* instr) {
if (instr->InstructionBits() == nopInstr) {
// Short-cut generic nop instructions. They are always valid and they
// never change the simulator state.
set_register(pc, reinterpret_cast<int32_t>(instr) +
Instruction::kInstrSize);
return;
}
if (instr->IsForbiddenInBranchDelay()) {
V8_Fatal(__FILE__, __LINE__,
"Eror:Unexpected %i opcode in a branch delay slot.",
......
......@@ -577,8 +577,8 @@ static void CompileCallLoadPropertyWithInterceptor(
ExternalReference ref =
ExternalReference(IC_Utility(IC::kLoadPropertyWithInterceptorOnly),
masm->isolate());
__ li(a0, Operand(5));
__ li(a1, Operand(ref));
__ PrepareCEntryArgs(5);
__ PrepareCEntryFunction(ref);
CEntryStub stub(1);
__ CallStub(&stub);
......@@ -4107,7 +4107,8 @@ void KeyedLoadStubCompiler::GenerateLoadFastElement(MacroAssembler* masm) {
// have been verified by the caller to not be a smi.
// Check that the key is a smi.
__ JumpIfNotSmi(a0, &miss_force_generic);
__ JumpIfNotSmi(a0, &miss_force_generic, at, USE_DELAY_SLOT);
// The delay slot can be safely used here, a1 is an object pointer.
// Get the elements array.
__ lw(a2, FieldMemOperand(a1, JSObject::kElementsOffset));
......@@ -4115,7 +4116,7 @@ void KeyedLoadStubCompiler::GenerateLoadFastElement(MacroAssembler* masm) {
// Check that the key is within bounds.
__ lw(a3, FieldMemOperand(a2, FixedArray::kLengthOffset));
__ Branch(&miss_force_generic, hs, a0, Operand(a3));
__ Branch(USE_DELAY_SLOT, &miss_force_generic, hs, a0, Operand(a3));
// Load the result and make sure it's not the hole.
__ Addu(a3, a2, Operand(FixedArray::kHeaderSize - kHeapObjectTag));
......@@ -4125,8 +4126,8 @@ void KeyedLoadStubCompiler::GenerateLoadFastElement(MacroAssembler* masm) {
__ lw(t0, MemOperand(t0));
__ LoadRoot(t1, Heap::kTheHoleValueRootIndex);
__ Branch(&miss_force_generic, eq, t0, Operand(t1));
__ Ret(USE_DELAY_SLOT);
__ mov(v0, t0);
__ Ret();
__ bind(&miss_force_generic);
Handle<Code> stub =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment