Implement function calls on ARM using the blx instruction when

available. Using blx will allow the CPU to predict the return address fo the function, resulting in better overall performamce. This is a copy of http://codereview.chromium.org/1113002 by rodolph.perfetta@googlemail.com git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4198 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

Implement function calls on ARM using the blx instruction when
available. Using blx will allow the CPU to predict the return address fo the function, resulting in better overall performamce. This is a copy of http://codereview.chromium.org/1113002 by rodolph.perfetta@googlemail.com git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4198 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
1b7df23f · erik.corry@gmail.com · 89b04089 · 1b7df23f · 1b7df23f · 1b7df23f
Commit 1b7df23f authored Mar 19, 2010 by erik.corry@gmail.com
6 changed files
--- a/src/arm/assembler-arm-inl.h
+++ b/src/arm/assembler-arm-inl.h
@@ -144,12 +144,21 @@ void RelocInfo::set_call_object(Object* target) {


 bool RelocInfo::IsPatchedReturnSequence() {
-  // On ARM a "call instruction" is actually two instructions.
-  //   mov lr, pc
-  //   ldr pc, [pc, #XXX]
-  return (Assembler::instr_at(pc_) == kMovLrPc)
-          && ((Assembler::instr_at(pc_ + Assembler::kInstrSize) & kLdrPCPattern)
-              == kLdrPCPattern);
+  Instr current_instr = Assembler::instr_at(pc_);
+  Instr next_instr = Assembler::instr_at(pc_ + Assembler::kInstrSize);
+#ifdef USE_BLX
+  // A patched return sequence is:
+  //  ldr ip, [pc, #0]
+  //  blx ip
+  return ((current_instr & kLdrPCMask) == kLdrPCPattern)
+          && ((next_instr & kBlxRegMask) == kBlxRegPattern);
+#else
+  // A patched return sequence is:
+  //  mov lr, pc
+  //  ldr pc, [pc, #-4]
+  return (current_instr == kMovLrPc)
+          && ((next_instr & kLdrPCMask) == kLdrPCPattern);
+#endif
 }


@@ -225,6 +234,16 @@ Address Assembler::target_address_address_at(Address pc) {
    target_pc -= kInstrSize;
    instr = Memory::int32_at(target_pc);
  }
+
+#ifdef USE_BLX
+  // If we have a blx instruction, the instruction before it is
+  // what needs to be patched.
+  if ((instr & kBlxRegMask) == kBlxRegPattern) {
+    target_pc -= kInstrSize;
+    instr = Memory::int32_at(target_pc);
+  }
+#endif
+
  // Verify that the instruction to patch is a
  // ldr<cond> <Rd>, [pc +/- offset_12].
  ASSERT((instr & 0x0f7f0000) == 0x051f0000);

--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -240,8 +240,14 @@ static const Instr kPopRegPattern =
    al | B26 | L | 4 | PostIndex | sp.code() * B16;
 // mov lr, pc
 const Instr kMovLrPc = al | 13*B21 | pc.code() | lr.code() * B12;
-// ldr pc, [pc, #XXX]
-const Instr kLdrPCPattern = al | B26 | L | pc.code() * B16;
+// ldr rd, [pc, #offset]
+const Instr kLdrPCMask = CondMask | 15 * B24 | 7 * B20 | 15 * B16;
+const Instr kLdrPCPattern = al | 5 * B24 | L | pc.code() * B16;
+// blxcc rm
+const Instr kBlxRegMask =
+    15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
+const Instr kBlxRegPattern =
+    B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | 3 * B4;

 // Spare buffer.
 static const int kMinimalBufferSize = 4*KB;

--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -509,7 +509,10 @@ typedef int32_t Instr;


 extern const Instr kMovLrPc;
+extern const Instr kLdrPCMask;
 extern const Instr kLdrPCPattern;
+extern const Instr kBlxRegMask;
+extern const Instr kBlxRegPattern;


 class Assembler : public Malloced {
@@ -590,12 +593,34 @@ class Assembler : public Malloced {
  static const int kInstrSize = sizeof(Instr);

  // Distance between the instruction referring to the address of the call
-  // target (ldr pc, [target addr in const pool]) and the return address
+  // target and the return address.
+#ifdef USE_BLX
+  // Call sequence is:
+  //  ldr  ip, [pc, #...] @ call address
+  //  blx  ip
+  //                      @ return address
+  static const int kCallTargetAddressOffset = 2 * kInstrSize;
+#else
+  // Call sequence is:
+  //  mov  lr, pc
+  //  ldr  pc, [pc, #...] @ call address
+  //                      @ return address
  static const int kCallTargetAddressOffset = kInstrSize;
+#endif

  // Distance between start of patched return sequence and the emitted address
  // to jump to.
-  static const int kPatchReturnSequenceAddressOffset = kInstrSize;
+#ifdef USE_BLX
+  // Return sequence is:
+  //  ldr  ip, [pc, #0]   @ emited address and start
+  //  blx  ip
+  static const int kPatchReturnSequenceAddressOffset =  0 * kInstrSize;
+#else
+  // Return sequence is:
+  //  mov  lr, pc         @ start of sequence
+  //  ldr  pc, [pc, #-4]  @ emited address
+  static const int kPatchReturnSequenceAddressOffset =  kInstrSize;
+#endif

  // Difference between address of current opcode and value read from pc
  // register.

--- a/src/arm/constants-arm.h
+++ b/src/arm/constants-arm.h
@@ -72,6 +72,11 @@
 # define CAN_USE_THUMB_INSTRUCTIONS 1
 #endif

+// Using blx may yield better code, so use it when required or when available
+#if defined(USE_THUMB_INTERWORK) || defined(CAN_USE_ARMV5_INSTRUCTIONS)
+#define USE_BLX 1
+#endif
+
 namespace assembler {
 namespace arm {


--- a/src/arm/debug-arm.cc
+++ b/src/arm/debug-arm.cc
@@ -46,13 +46,23 @@ void BreakLocationIterator::SetDebugBreakAtReturn() {
  //   add sp, sp, #4
  //   bx lr
  // to a call to the debug break return code.
+  // #if USE_BLX
+  //   ldr ip, [pc, #0]
+  //   blx ip
+  // #else
  //   mov lr, pc
  //   ldr pc, [pc, #-4]
+  // #endif
  //   <debug break return code entry point address>
  //   bktp 0
  CodePatcher patcher(rinfo()->pc(), 4);
+#ifdef USE_BLX
+  patcher.masm()->ldr(v8::internal::ip, MemOperand(v8::internal::pc, 0));
+  patcher.masm()->blx(v8::internal::ip);
+#else
  patcher.masm()->mov(v8::internal::lr, v8::internal::pc);
  patcher.masm()->ldr(v8::internal::pc, MemOperand(v8::internal::pc, -4));
+#endif
  patcher.Emit(Debug::debug_break_return()->entry());
  patcher.masm()->bkpt(0);
 }

--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -58,11 +58,6 @@ MacroAssembler::MacroAssembler(void* buffer, int size)
 #endif


-// Using blx may yield better code, so use it when required or when available
-#if defined(USE_THUMB_INTERWORK) || defined(CAN_USE_ARMV5_INSTRUCTIONS)
-#define USE_BLX 1
-#endif
-
 // Using bx does not yield better code, so use it only when required
 #if defined(USE_THUMB_INTERWORK)
 #define USE_BX 1
@@ -117,16 +112,33 @@ void MacroAssembler::Call(Register target, Condition cond) {

 void MacroAssembler::Call(intptr_t target, RelocInfo::Mode rmode,
                          Condition cond) {
+#if USE_BLX
+  // On ARMv5 and after the recommended call sequence is:
+  //  ldr ip, [pc, #...]
+  //  blx ip
+
+  // The two instructions (ldr and blx) could be separated by a literal
+  // pool and the code would still work. The issue comes from the
+  // patching code which expect the ldr to be just above the blx.
+  BlockConstPoolFor(2);
+  // Statement positions are expected to be recorded when the target
+  // address is loaded. The mov method will automatically record
+  // positions when pc is the target, since this is not the case here
+  // we have to do it explicitly.
+  WriteRecordedPositions();
+
+  mov(ip, Operand(target, rmode), LeaveCC, cond);
+  blx(ip, cond);
+
+  ASSERT(kCallTargetAddressOffset == 2 * kInstrSize);
+#else
  // Set lr for return at current pc + 8.
  mov(lr, Operand(pc), LeaveCC, cond);
  // Emit a ldr<cond> pc, [pc + offset of target in constant pool].
  mov(pc, Operand(target, rmode), LeaveCC, cond);
-  // If USE_BLX is defined, we could emit a 'mov ip, target', followed by a
-  // 'blx ip'; however, the code would not be shorter than the above sequence
-  // and the target address of the call would be referenced by the first
-  // instruction rather than the second one, which would make it harder to patch
-  // (two instructions before the return address, instead of one).
+
  ASSERT(kCallTargetAddressOffset == kInstrSize);
+#endif
 }