Commit 91ec654b authored by danno's avatar danno Committed by Commit bot

[turbofan]: Use "leal" more prevasively on x64

Only use "addl" and "subl" in cases that have been measured to be
faster (currently only immediate operations).

Review URL: https://codereview.chromium.org/735293004

Cr-Commit-Position: refs/heads/master@{#25580}
parent 82d0f800
......@@ -590,10 +590,26 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ movsd(operand, i.InputDoubleRegister(index));
}
break;
case kX64Lea32:
__ leal(i.OutputRegister(), i.MemoryOperand());
case kX64Lea32: {
AddressingMode mode = AddressingModeField::decode(instr->opcode());
// Shorten "leal" to "addl" or "subl" if the register allocation just
// happens to work out for operations with immediate operands where the
// non-constant input register is the same as output register. The
// "addl"/"subl" forms in these cases are faster based on empirical
// measurements.
if (mode == kMode_MRI && i.InputRegister(0).is(i.OutputRegister())) {
int32_t constant_summand = i.InputInt32(1);
if (constant_summand > 0) {
__ addl(i.OutputRegister(), Immediate(constant_summand));
} else if (constant_summand < 0) {
__ subl(i.OutputRegister(), Immediate(-constant_summand));
}
} else {
__ leal(i.OutputRegister(), i.MemoryOperand());
}
__ AssertZeroExtended(i.OutputRegister());
break;
}
case kX64Lea:
__ leaq(i.OutputRegister(), i.MemoryOperand());
break;
......
......@@ -430,33 +430,6 @@ void InstructionSelector::VisitInt32Add(Node* node) {
// case that there are only two operands to the add and one of them isn't
// live, use a plain "addl".
if (m.matches() && (m.constant() == NULL || g.CanBeImmediate(m.constant()))) {
if (m.offset() != NULL) {
if (m.constant() == NULL) {
if (m.scaled() != NULL && m.scale_exponent() == 0) {
if (!IsLive(m.offset())) {
Emit(kX64Add32, g.DefineSameAsFirst(node),
g.UseRegister(m.offset()), g.Use(m.scaled()));
return;
} else if (!IsLive(m.scaled())) {
Emit(kX64Add32, g.DefineSameAsFirst(node),
g.UseRegister(m.scaled()), g.Use(m.offset()));
return;
}
}
} else {
if (m.scale_exponent() == 0) {
if (m.scaled() == NULL || m.offset() == NULL) {
Node* non_constant = m.scaled() == NULL ? m.offset() : m.scaled();
if (!IsLive(non_constant)) {
Emit(kX64Add32, g.DefineSameAsFirst(node),
g.UseRegister(non_constant), g.UseImmediate(m.constant()));
return;
}
}
}
}
}
InstructionOperand* inputs[4];
size_t input_count = 0;
AddressingMode mode = GenerateMemoryOperandInputs(
......@@ -491,15 +464,12 @@ void InstructionSelector::VisitInt32Sub(Node* node) {
Emit(kX64Neg32, g.DefineSameAsFirst(node), g.UseRegister(m.right().node()));
} else {
if (m.right().HasValue() && g.CanBeImmediate(m.right().node())) {
if (IsLive(m.left().node())) {
// Special handling for subtraction of constants where the non-constant
// input is used elsewhere. To eliminate the gap move before the sub to
// copy the destination register, use a "leal" instead.
Emit(kX64Lea32 | AddressingModeField::encode(kMode_MRI),
g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.TempImmediate(-m.right().Value()));
return;
}
// Turn subtractions of constant values into immediate "leal" instructions
// by negating the value.
Emit(kX64Lea32 | AddressingModeField::encode(kMode_MRI),
g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.TempImmediate(-m.right().Value()));
return;
}
VisitBinop(this, node, kX64Sub32);
}
......
......@@ -265,14 +265,18 @@ TEST_F(InstructionSelectorTest, Int32AddConstantAsLeaSingle) {
StreamBuilder m(this, kMachInt32, kMachInt32);
Node* const p0 = m.Parameter(0);
Node* const c0 = m.Int32Constant(15);
// If there is only a single use of an add's input, use an "addl" not a
// "leal", it is faster.
// If one of the add's operands is only used once, use an "leal", even though
// an "addl" could be used. The "leal" has proven faster--out best guess is
// that it gives the register allocation more freedom and it doesn't set
// flags, reducing pressure in the CPU's pipeline. If we're lucky with
// register allocation, then code generation will select an "addl" later for
// the cases that have been measured to be faster.
Node* const v0 = m.Int32Add(p0, c0);
m.Return(v0);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
EXPECT_EQ(kMode_None, s[0]->addressing_mode());
EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
......@@ -284,12 +288,13 @@ TEST_F(InstructionSelectorTest, Int32AddConstantAsAdd) {
Node* const p0 = m.Parameter(0);
Node* const c0 = m.Int32Constant(1);
// If there is only a single use of an add's input and the immediate constant
// for the add is 1, use inc.
// for the add is 1, don't use an inc. It is much slower on modern Intel
// architectures.
m.Return(m.Int32Add(p0, c0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
EXPECT_EQ(kMode_None, s[0]->addressing_mode());
EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
......@@ -317,12 +322,17 @@ TEST_F(InstructionSelectorTest, Int32AddCommutedConstantAsLeaSingle) {
StreamBuilder m(this, kMachInt32, kMachInt32);
Node* const p0 = m.Parameter(0);
Node* const c0 = m.Int32Constant(15);
// If there is only a single use of an add's input, use "addl"
// If one of the add's operands is only used once, use an "leal", even though
// an "addl" could be used. The "leal" has proven faster--out best guess is
// that it gives the register allocation more freedom and it doesn't set
// flags, reducing pressure in the CPU's pipeline. If we're lucky with
// register allocation, then code generation will select an "addl" later for
// the cases that have been measured to be faster.
m.Return(m.Int32Add(c0, p0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
EXPECT_EQ(kMode_None, s[0]->addressing_mode());
EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
......@@ -351,12 +361,17 @@ TEST_F(InstructionSelectorTest, Int32AddSimpleAsAdd) {
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
// If one of the add's operands is only used once, use an "addl".
// If one of the add's operands is only used once, use an "leal", even though
// an "addl" could be used. The "leal" has proven faster--out best guess is
// that it gives the register allocation more freedom and it doesn't set
// flags, reducing pressure in the CPU's pipeline. If we're lucky with
// register allocation, then code generation will select an "addl" later for
// the cases that have been measured to be faster.
m.Return(m.Int32Add(p0, p1));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
EXPECT_EQ(kMode_None, s[0]->addressing_mode());
EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
EXPECT_EQ(kMode_MR1, s[0]->addressing_mode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
......@@ -715,8 +730,8 @@ TEST_F(InstructionSelectorTest, Int32SubConstantAsSub) {
m.Return(m.Int32Sub(p0, c0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kX64Sub32, s[0]->arch_opcode());
EXPECT_EQ(kMode_None, s[0]->addressing_mode());
EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
......@@ -759,7 +774,7 @@ TEST_F(InstructionSelectorTest, Int32AddScaled2Other) {
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(s.ToVreg(a0), s.ToVreg(s[0]->OutputAt(0)));
ASSERT_EQ(2U, s[1]->InputCount());
EXPECT_EQ(kX64Add32, s[1]->arch_opcode());
EXPECT_EQ(kX64Lea32, s[1]->arch_opcode());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[1]->InputAt(0)));
EXPECT_EQ(s.ToVreg(a0), s.ToVreg(s[1]->InputAt(1)));
EXPECT_EQ(s.ToVreg(a1), s.ToVreg(s[1]->OutputAt(0)));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment