Optimise Math.floor(x/y) to use integer division for specific divisor.

Landing for Rodolph Perfetta <rodolph.perfetta@gmail.com>. 

Original CL: http://codereview.chromium.org/9638018/
Review URL: https://chromiumcodereview.appspot.com/10197010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11427 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent c25a92d7
......@@ -1315,6 +1315,75 @@ LInstruction* LChunkBuilder::DoDiv(HDiv* instr) {
}
bool LChunkBuilder::HasMagicNumberForDivisor(int32_t divisor) {
uint32_t divisor_abs = abs(divisor);
// Dividing by 0, 1, and powers of 2 is easy.
// Note that IsPowerOf2(0) returns true;
ASSERT(IsPowerOf2(0) == true);
if (IsPowerOf2(divisor_abs)) return true;
// We have magic numbers for a few specific divisors.
// Details and proofs can be found in:
// - Hacker's Delight, Henry S. Warren, Jr.
// - The PowerPC Compiler Writer’s Guide
// and probably many others.
//
// We handle
// <divisor with magic numbers> * <power of 2>
// but not
// <divisor with magic numbers> * <other divisor with magic numbers>
int32_t power_of_2_factor =
CompilerIntrinsics::CountTrailingZeros(divisor_abs);
DivMagicNumbers magic_numbers =
DivMagicNumberFor(divisor_abs >> power_of_2_factor);
if (magic_numbers.M != InvalidDivMagicNumber.M) return true;
return false;
}
HValue* LChunkBuilder::SimplifiedDividendForMathFloorOfDiv(HValue* dividend) {
// A value with an integer representation does not need to be transformed.
if (dividend->representation().IsInteger32()) {
return dividend;
// A change from an integer32 can be replaced by the integer32 value.
} else if (dividend->IsChange() &&
HChange::cast(dividend)->from().IsInteger32()) {
return HChange::cast(dividend)->value();
}
return NULL;
}
HValue* LChunkBuilder::SimplifiedDivisorForMathFloorOfDiv(HValue* divisor) {
// Only optimize when we have magic numbers for the divisor.
// The standard integer division routine is usually slower than transitionning
// to VFP.
if (divisor->IsConstant() &&
HConstant::cast(divisor)->HasInteger32Value()) {
HConstant* constant_val = HConstant::cast(divisor);
int32_t int32_val = constant_val->Integer32Value();
if (LChunkBuilder::HasMagicNumberForDivisor(int32_val)) {
return constant_val->CopyToRepresentation(Representation::Integer32());
}
}
return NULL;
}
LInstruction* LChunkBuilder::DoMathFloorOfDiv(HMathFloorOfDiv* instr) {
HValue* right = instr->right();
LOperand* dividend = UseRegister(instr->left());
LOperand* divisor = UseRegisterOrConstant(right);
LOperand* remainder = TempRegister();
ASSERT(right->IsConstant() &&
HConstant::cast(right)->HasInteger32Value() &&
HasMagicNumberForDivisor(HConstant::cast(right)->Integer32Value()));
return AssignEnvironment(DefineAsRegister(
new LMathFloorOfDiv(dividend, divisor, remainder)));
}
LInstruction* LChunkBuilder::DoMod(HMod* instr) {
if (instr->representation().IsInteger32()) {
ASSERT(instr->left()->representation().IsInteger32());
......
......@@ -132,6 +132,7 @@ class LCodeGen;
V(LoadNamedField) \
V(LoadNamedFieldPolymorphic) \
V(LoadNamedGeneric) \
V(MathFloorOfDiv) \
V(ModI) \
V(MulI) \
V(NumberTagD) \
......@@ -579,6 +580,21 @@ class LDivI: public LTemplateInstruction<1, 2, 0> {
};
class LMathFloorOfDiv: public LTemplateInstruction<1, 2, 1> {
public:
LMathFloorOfDiv(LOperand* left,
LOperand* right,
LOperand* temp = NULL) {
inputs_[0] = left;
inputs_[1] = right;
temps_[0] = temp;
}
DECLARE_CONCRETE_INSTRUCTION(MathFloorOfDiv, "math-floor-of-div")
DECLARE_HYDROGEN_ACCESSOR(MathFloorOfDiv)
};
class LMulI: public LTemplateInstruction<1, 2, 1> {
public:
LMulI(LOperand* left, LOperand* right, LOperand* temp) {
......@@ -2287,6 +2303,10 @@ class LChunkBuilder BASE_EMBEDDED {
HYDROGEN_CONCRETE_INSTRUCTION_LIST(DECLARE_DO)
#undef DECLARE_DO
static bool HasMagicNumberForDivisor(int32_t divisor);
static HValue* SimplifiedDividendForMathFloorOfDiv(HValue* val);
static HValue* SimplifiedDivisorForMathFloorOfDiv(HValue* val);
private:
enum Status {
UNUSED,
......
......@@ -1034,6 +1034,100 @@ void LCodeGen::DoModI(LModI* instr) {
}
void LCodeGen::EmitSignedIntegerDivisionByConstant(
Register result,
Register dividend,
int32_t divisor,
Register remainder,
Register scratch,
LEnvironment* environment) {
ASSERT(!AreAliased(dividend, scratch, ip));
ASSERT(LChunkBuilder::HasMagicNumberForDivisor(divisor));
uint32_t divisor_abs = abs(divisor);
int32_t power_of_2_factor =
CompilerIntrinsics::CountTrailingZeros(divisor_abs);
switch (divisor_abs) {
case 0:
DeoptimizeIf(al, environment);
return;
case 1:
if (divisor > 0) {
__ Move(result, dividend);
} else {
__ rsb(result, dividend, Operand(0), SetCC);
DeoptimizeIf(vs, environment);
}
// Compute the remainder.
__ mov(remainder, Operand(0));
return;
default:
if (IsPowerOf2(divisor_abs)) {
// Branch and condition free code for integer division by a power
// of two.
int32_t power = WhichPowerOf2(divisor_abs);
if (power > 1) {
__ mov(scratch, Operand(dividend, ASR, power - 1));
}
__ add(scratch, dividend, Operand(scratch, LSR, 32 - power));
__ mov(result, Operand(scratch, ASR, power));
// Negate if necessary.
// We don't need to check for overflow because the case '-1' is
// handled separately.
if (divisor < 0) {
ASSERT(divisor != -1);
__ rsb(result, result, Operand(0));
}
// Compute the remainder.
if (divisor > 0) {
__ sub(remainder, dividend, Operand(result, LSL, power));
} else {
__ add(remainder, dividend, Operand(result, LSL, power));
}
return;
} else {
// Use magic numbers for a few specific divisors.
// Details and proofs can be found in:
// - Hacker's Delight, Henry S. Warren, Jr.
// - The PowerPC Compiler Writer’s Guide
// and probably many others.
//
// We handle
// <divisor with magic numbers> * <power of 2>
// but not
// <divisor with magic numbers> * <other divisor with magic numbers>
DivMagicNumbers magic_numbers =
DivMagicNumberFor(divisor_abs >> power_of_2_factor);
// Branch and condition free code for integer division by a power
// of two.
const int32_t M = magic_numbers.M;
const int32_t s = magic_numbers.s + power_of_2_factor;
__ mov(ip, Operand(M));
__ smull(ip, scratch, dividend, ip);
if (M < 0) {
__ add(scratch, scratch, Operand(dividend));
}
if (s > 0) {
__ mov(scratch, Operand(scratch, ASR, s));
}
__ add(result, scratch, Operand(dividend, LSR, 31));
if (divisor < 0) __ rsb(result, result, Operand(0));
// Compute the remainder.
__ mov(ip, Operand(divisor));
// This sequence could be replaced with 'mls' when
// it gets implemented.
__ mul(scratch, result, ip);
__ sub(remainder, dividend, scratch);
}
}
}
void LCodeGen::DoDivI(LDivI* instr) {
class DeferredDivI: public LDeferredCode {
public:
......@@ -1115,6 +1209,34 @@ void LCodeGen::DoDivI(LDivI* instr) {
}
void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) {
const Register result = ToRegister(instr->result());
const Register left = ToRegister(instr->InputAt(0));
const Register remainder = ToRegister(instr->TempAt(0));
const Register scratch = scratch0();
// We only optimize this for division by constants, because the standard
// integer division routine is usually slower than transitionning to VFP.
// This could be optimized on processors with SDIV available.
ASSERT(instr->InputAt(1)->IsConstantOperand());
int32_t divisor = ToInteger32(LConstantOperand::cast(instr->InputAt(1)));
if (divisor < 0) {
__ cmp(left, Operand(0));
DeoptimizeIf(eq, instr->environment());
}
EmitSignedIntegerDivisionByConstant(result,
left,
divisor,
remainder,
scratch,
instr->environment());
// We operated a truncating division. Correct the result if necessary.
__ cmp(remainder, Operand(0));
__ teq(remainder, Operand(divisor), ne);
__ sub(result, result, Operand(1), LeaveCC, mi);
}
template<int T>
void LCodeGen::DoDeferredBinaryOpStub(LTemplateInstruction<1, 2, T>* instr,
Token::Value op) {
......
......@@ -323,6 +323,17 @@ class LCodeGen BASE_EMBEDDED {
Register source,
int* offset);
// Emit optimized code for integer division.
// Inputs are signed.
// All registers are clobbered.
// If 'remainder' is no_reg, it is not computed.
void EmitSignedIntegerDivisionByConstant(Register result,
Register dividend,
int32_t divisor,
Register remainder,
Register scratch,
LEnvironment* environment);
struct JumpTableEntry {
explicit inline JumpTableEntry(Address entry)
: label(),
......
......@@ -3710,15 +3710,28 @@ void MacroAssembler::CheckEnumCache(Register null_value, Label* call_runtime) {
}
bool AreAliased(Register r1, Register r2, Register r3, Register r4) {
if (r1.is(r2)) return true;
if (r1.is(r3)) return true;
if (r1.is(r4)) return true;
if (r2.is(r3)) return true;
if (r2.is(r4)) return true;
if (r3.is(r4)) return true;
return false;
#ifdef DEBUG
bool AreAliased(Register reg1,
Register reg2,
Register reg3,
Register reg4,
Register reg5,
Register reg6) {
int n_of_valid_regs = reg1.is_valid() + reg2.is_valid() +
reg3.is_valid() + reg4.is_valid() + reg5.is_valid() + reg6.is_valid();
RegList regs = 0;
if (reg1.is_valid()) regs |= reg1.bit();
if (reg2.is_valid()) regs |= reg2.bit();
if (reg3.is_valid()) regs |= reg3.bit();
if (reg4.is_valid()) regs |= reg4.bit();
if (reg5.is_valid()) regs |= reg5.bit();
if (reg6.is_valid()) regs |= reg6.bit();
int n_of_non_aliasing_regs = NumRegs(regs);
return n_of_valid_regs != n_of_non_aliasing_regs;
}
#endif
CodePatcher::CodePatcher(byte* address, int instructions)
......
......@@ -85,7 +85,14 @@ enum SmiCheck { INLINE_SMI_CHECK, OMIT_SMI_CHECK };
enum LinkRegisterStatus { kLRHasNotBeenSaved, kLRHasBeenSaved };
bool AreAliased(Register r1, Register r2, Register r3, Register r4);
#ifdef DEBUG
bool AreAliased(Register reg1,
Register reg2,
Register reg3 = no_reg,
Register reg4 = no_reg,
Register reg5 = no_reg,
Register reg6 = no_reg);
#endif
// MacroAssembler implements a collection of frequently used macros.
......
......@@ -40,6 +40,9 @@ class CompilerIntrinsics {
// Returns number of zero bits following most significant 1 bit.
// Undefined for zero value.
INLINE(static int CountLeadingZeros(uint32_t value));
// Returns the number of bits set.
INLINE(static int CountSetBits(uint32_t value));
};
#ifdef __GNUC__
......@@ -51,6 +54,10 @@ int CompilerIntrinsics::CountLeadingZeros(uint32_t value) {
return __builtin_clz(value);
}
int CompilerIntrinsics::CountSetBits(uint32_t value) {
return __builtin_popcount(value);
}
#elif defined(_MSC_VER)
#pragma intrinsic(_BitScanForward)
......@@ -68,6 +75,23 @@ int CompilerIntrinsics::CountLeadingZeros(uint32_t value) {
return 31 - static_cast<int>(result);
}
int CompilerIntrinsics::CountSetBits(uint32_t value) {
// __popcnt is only supported from VS2008.
#define _MSC_VER_VS2008 1500
#if _MSC_VER >= _MSC_VER_VS2008
return __popcnt(value);
#else
// Manually count set bits.
value = ((value >> 1) & 0x55555555) + (value & 0x55555555);
value = ((value >> 2) & 0x33333333) + (value & 0x33333333);
value = ((value >> 4) & 0x0f0f0f0f) + (value & 0x0f0f0f0f);
value = ((value >> 8) & 0x00ff00ff) + (value & 0x00ff00ff);
value = ((value >> 16) & 0x0000ffff) + (value & 0x0000ffff);
return value;
#endif
#undef _MSC_VER_VS2008
}
#else
#error Unsupported compiler
#endif
......
......@@ -1359,12 +1359,7 @@ InnerPointerToCodeCache::InnerPointerToCodeCacheEntry*
// -------------------------------------------------------------------------
int NumRegs(RegList reglist) {
int n = 0;
while (reglist != 0) {
n++;
reglist &= reglist - 1; // clear one bit
}
return n;
return CompilerIntrinsics::CountSetBits(reglist);
}
......
......@@ -931,6 +931,62 @@ void HJSArrayLength::PrintDataTo(StringStream* stream) {
}
HValue* HUnaryMathOperation::Canonicalize() {
if (op() == kMathFloor) {
// If the input is integer32 then we replace the floor instruction
// with its input. This happens before the representation changes are
// introduced.
if (value()->representation().IsInteger32()) return value();
#ifdef V8_TARGET_ARCH_ARM
if (value()->IsDiv() && (value()->UseCount() == 1)) {
// TODO(2038): Implement this optimization for non ARM architectures.
HDiv* hdiv = HDiv::cast(value());
HValue* left = hdiv->left();
HValue* right = hdiv->right();
// Try to simplify left and right values of the division.
HValue* new_left =
LChunkBuilder::SimplifiedDividendForMathFloorOfDiv(left);
HValue* new_right =
LChunkBuilder::SimplifiedDivisorForMathFloorOfDiv(right);
// Return if left or right are not optimizable.
if ((new_left == NULL) || (new_right == NULL)) return this;
// Insert the new values in the graph.
if (new_left->IsInstruction() &&
!HInstruction::cast(new_left)->IsLinked()) {
HInstruction::cast(new_left)->InsertBefore(this);
}
if (new_right->IsInstruction() &&
!HInstruction::cast(new_right)->IsLinked()) {
HInstruction::cast(new_right)->InsertBefore(this);
}
HMathFloorOfDiv* instr = new HMathFloorOfDiv(context(),
new_left,
new_right);
// Replace this HMathFloor instruction by the new HMathFloorOfDiv.
instr->InsertBefore(this);
ReplaceAllUsesWith(instr);
Kill();
// We know the division had no other uses than this HMathFloor. Delete it.
// Also delete the arguments of the division if they are not used any
// more.
hdiv->DeleteAndReplaceWith(NULL);
ASSERT(left->IsChange() || left->IsConstant());
ASSERT(right->IsChange() || right->IsConstant());
if (left->HasNoUses()) left->DeleteAndReplaceWith(NULL);
if (right->HasNoUses()) right->DeleteAndReplaceWith(NULL);
// Return NULL to remove this instruction from the graph.
return NULL;
}
#endif // V8_TARGET_ARCH_ARM
}
return this;
}
HValue* HCheckInstanceType::Canonicalize() {
if (check_ == IS_STRING &&
!value()->type().IsUninitialized() &&
......
......@@ -140,6 +140,7 @@ class LChunkBuilder;
V(LoadNamedField) \
V(LoadNamedFieldPolymorphic) \
V(LoadNamedGeneric) \
V(MathFloorOfDiv) \
V(Mod) \
V(Mul) \
V(ObjectLiteral) \
......@@ -1992,15 +1993,7 @@ class HUnaryMathOperation: public HTemplateInstruction<2> {
}
}
virtual HValue* Canonicalize() {
// If the input is integer32 then we replace the floor instruction
// with its inputs. This happens before the representation changes are
// introduced.
if (op() == kMathFloor) {
if (value()->representation().IsInteger32()) return value();
}
return this;
}
virtual HValue* Canonicalize();
BuiltinFunctionId op() const { return op_; }
const char* OpName() const;
......@@ -2758,6 +2751,25 @@ class HBitwiseBinaryOperation: public HBinaryOperation {
};
class HMathFloorOfDiv: public HBinaryOperation {
public:
HMathFloorOfDiv(HValue* context, HValue* left, HValue* right)
: HBinaryOperation(context, left, right) {
set_representation(Representation::Integer32());
SetFlag(kUseGVN);
}
virtual Representation RequiredInputRepresentation(int index) {
return Representation::Integer32();
}
DECLARE_CONCRETE_INSTRUCTION(MathFloorOfDiv)
protected:
virtual bool DataEquals(HValue* other) { return true; }
};
class HArithmeticBinaryOperation: public HBinaryOperation {
public:
HArithmeticBinaryOperation(HValue* context, HValue* left, HValue* right)
......
......@@ -1355,6 +1355,12 @@ LInstruction* LChunkBuilder::DoDiv(HDiv* instr) {
}
LInstruction* LChunkBuilder::DoMathFloorOfDiv(HMathFloorOfDiv* instr) {
UNIMPLEMENTED();
return NULL;
}
LInstruction* LChunkBuilder::DoMod(HMod* instr) {
if (instr->representation().IsInteger32()) {
ASSERT(instr->left()->representation().IsInteger32());
......
......@@ -1316,6 +1316,12 @@ LInstruction* LChunkBuilder::DoDiv(HDiv* instr) {
}
LInstruction* LChunkBuilder::DoMathFloorOfDiv(HMathFloorOfDiv* instr) {
UNIMPLEMENTED();
return NULL;
}
LInstruction* LChunkBuilder::DoMod(HMod* instr) {
if (instr->representation().IsInteger32()) {
ASSERT(instr->left()->representation().IsInteger32());
......
......@@ -89,4 +89,19 @@ char* SimpleStringBuilder::Finalize() {
return buffer_.start();
}
const DivMagicNumbers DivMagicNumberFor(int32_t divisor) {
switch (divisor) {
case 3: return DivMagicNumberFor3;
case 5: return DivMagicNumberFor5;
case 7: return DivMagicNumberFor7;
case 9: return DivMagicNumberFor9;
case 11: return DivMagicNumberFor11;
case 25: return DivMagicNumberFor25;
case 125: return DivMagicNumberFor125;
case 625: return DivMagicNumberFor625;
default: return InvalidDivMagicNumber;
}
}
} } // namespace v8::internal
......@@ -85,6 +85,32 @@ inline int WhichPowerOf2(uint32_t x) {
}
// Magic numbers for integer division.
// These are kind of 2's complement reciprocal of the divisors.
// Details and proofs can be found in:
// - Hacker's Delight, Henry S. Warren, Jr.
// - The PowerPC Compiler Writer’s Guide
// and probably many others.
// See details in the implementation of the algorithm in
// lithium-codegen-arm.cc : LCodeGen::TryEmitSignedIntegerDivisionByConstant().
struct DivMagicNumbers {
unsigned M;
unsigned s;
};
const DivMagicNumbers InvalidDivMagicNumber= {0, 0};
const DivMagicNumbers DivMagicNumberFor3 = {0x55555556, 0};
const DivMagicNumbers DivMagicNumberFor5 = {0x66666667, 1};
const DivMagicNumbers DivMagicNumberFor7 = {0x92492493, 2};
const DivMagicNumbers DivMagicNumberFor9 = {0x38e38e39, 1};
const DivMagicNumbers DivMagicNumberFor11 = {0x2e8ba2e9, 1};
const DivMagicNumbers DivMagicNumberFor25 = {0x51eb851f, 3};
const DivMagicNumbers DivMagicNumberFor125 = {0x10624dd3, 3};
const DivMagicNumbers DivMagicNumberFor625 = {0x68db8bad, 8};
const DivMagicNumbers DivMagicNumberFor(int32_t divisor);
// The C++ standard leaves the semantics of '>>' undefined for
// negative signed operands. Most implementations do the right thing,
// though.
......
......@@ -1305,6 +1305,12 @@ LInstruction* LChunkBuilder::DoDiv(HDiv* instr) {
}
LInstruction* LChunkBuilder::DoMathFloorOfDiv(HMathFloorOfDiv* instr) {
UNIMPLEMENTED();
return NULL;
}
LInstruction* LChunkBuilder::DoMod(HMod* instr) {
if (instr->representation().IsInteger32()) {
ASSERT(instr->left()->representation().IsInteger32());
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment