Commit b702cd97 authored by martyn.capewell's avatar martyn.capewell Committed by Commit bot

[turbofan] Prefer add/shift over madd on ARM64

Before selecting multiply-accumulate for a multiplication with add operation,
check that the multiply can't be reduced to add-with-shift. This prevents
simple multiplications by 3, 5, etc turning into register moves and madd
instructions.

Review URL: https://codereview.chromium.org/1180863002

Cr-Commit-Position: refs/heads/master@{#28976}
parent 06307997
......@@ -284,6 +284,22 @@ void VisitAddSub(InstructionSelector* selector, Node* node, ArchOpcode opcode,
}
}
// For multiplications by immediate of the form x * (2^k + 1), where k > 0,
// return the value of k, otherwise return zero. This is used to reduce the
// multiplication to addition with left shift: x + (x << k).
template <typename Matcher>
int32_t LeftShiftForReducedMultiply(Matcher* m) {
DCHECK(m->IsInt32Mul() || m->IsInt64Mul());
if (m->right().HasValue() && m->right().Value() >= 3) {
uint64_t value_minus_one = m->right().Value() - 1;
if (base::bits::IsPowerOfTwo64(value_minus_one)) {
return WhichPowerOf2_64(value_minus_one);
}
}
return 0;
}
} // namespace
......@@ -843,18 +859,26 @@ void InstructionSelector::VisitInt32Add(Node* node) {
// Select Madd(x, y, z) for Add(Mul(x, y), z).
if (m.left().IsInt32Mul() && CanCover(node, m.left().node())) {
Int32BinopMatcher mleft(m.left().node());
Emit(kArm64Madd32, g.DefineAsRegister(node),
g.UseRegister(mleft.left().node()),
g.UseRegister(mleft.right().node()), g.UseRegister(m.right().node()));
return;
// Check multiply can't be later reduced to addition with shift.
if (LeftShiftForReducedMultiply(&mleft) == 0) {
Emit(kArm64Madd32, g.DefineAsRegister(node),
g.UseRegister(mleft.left().node()),
g.UseRegister(mleft.right().node()),
g.UseRegister(m.right().node()));
return;
}
}
// Select Madd(x, y, z) for Add(x, Mul(x, y)).
// Select Madd(x, y, z) for Add(z, Mul(x, y)).
if (m.right().IsInt32Mul() && CanCover(node, m.right().node())) {
Int32BinopMatcher mright(m.right().node());
Emit(kArm64Madd32, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()), g.UseRegister(m.left().node()));
return;
// Check multiply can't be later reduced to addition with shift.
if (LeftShiftForReducedMultiply(&mright) == 0) {
Emit(kArm64Madd32, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()),
g.UseRegister(m.left().node()));
return;
}
}
VisitAddSub<Int32BinopMatcher>(this, node, kArm64Add32, kArm64Sub32);
}
......@@ -866,18 +890,26 @@ void InstructionSelector::VisitInt64Add(Node* node) {
// Select Madd(x, y, z) for Add(Mul(x, y), z).
if (m.left().IsInt64Mul() && CanCover(node, m.left().node())) {
Int64BinopMatcher mleft(m.left().node());
Emit(kArm64Madd, g.DefineAsRegister(node),
g.UseRegister(mleft.left().node()),
g.UseRegister(mleft.right().node()), g.UseRegister(m.right().node()));
return;
// Check multiply can't be later reduced to addition with shift.
if (LeftShiftForReducedMultiply(&mleft) == 0) {
Emit(kArm64Madd, g.DefineAsRegister(node),
g.UseRegister(mleft.left().node()),
g.UseRegister(mleft.right().node()),
g.UseRegister(m.right().node()));
return;
}
}
// Select Madd(x, y, z) for Add(x, Mul(x, y)).
// Select Madd(x, y, z) for Add(z, Mul(x, y)).
if (m.right().IsInt64Mul() && CanCover(node, m.right().node())) {
Int64BinopMatcher mright(m.right().node());
Emit(kArm64Madd, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()), g.UseRegister(m.left().node()));
return;
// Check multiply can't be later reduced to addition with shift.
if (LeftShiftForReducedMultiply(&mright) == 0) {
Emit(kArm64Madd, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()),
g.UseRegister(m.left().node()));
return;
}
}
VisitAddSub<Int64BinopMatcher>(this, node, kArm64Add, kArm64Sub);
}
......@@ -887,13 +919,17 @@ void InstructionSelector::VisitInt32Sub(Node* node) {
Arm64OperandGenerator g(this);
Int32BinopMatcher m(node);
// Select Msub(a, x, y) for Sub(a, Mul(x, y)).
// Select Msub(x, y, a) for Sub(a, Mul(x, y)).
if (m.right().IsInt32Mul() && CanCover(node, m.right().node())) {
Int32BinopMatcher mright(m.right().node());
Emit(kArm64Msub32, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()), g.UseRegister(m.left().node()));
return;
// Check multiply can't be later reduced to addition with shift.
if (LeftShiftForReducedMultiply(&mright) == 0) {
Emit(kArm64Msub32, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()),
g.UseRegister(m.left().node()));
return;
}
}
if (m.left().Is(0)) {
......@@ -909,13 +945,17 @@ void InstructionSelector::VisitInt64Sub(Node* node) {
Arm64OperandGenerator g(this);
Int64BinopMatcher m(node);
// Select Msub(a, x, y) for Sub(a, Mul(x, y)).
// Select Msub(x, y, a) for Sub(a, Mul(x, y)).
if (m.right().IsInt64Mul() && CanCover(node, m.right().node())) {
Int64BinopMatcher mright(m.right().node());
Emit(kArm64Msub, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()), g.UseRegister(m.left().node()));
return;
// Check multiply can't be later reduced to addition with shift.
if (LeftShiftForReducedMultiply(&mright) == 0) {
Emit(kArm64Msub, g.DefineAsRegister(node),
g.UseRegister(mright.left().node()),
g.UseRegister(mright.right().node()),
g.UseRegister(m.left().node()));
return;
}
}
if (m.left().Is(0)) {
......@@ -930,6 +970,16 @@ void InstructionSelector::VisitInt32Mul(Node* node) {
Arm64OperandGenerator g(this);
Int32BinopMatcher m(node);
// First, try to reduce the multiplication to addition with left shift.
// x * (2^k + 1) -> x + (x << k)
int32_t shift = LeftShiftForReducedMultiply(&m);
if (shift > 0) {
Emit(kArm64Add32 | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.UseRegister(m.left().node()), g.TempImmediate(shift));
return;
}
if (m.left().IsInt32Sub() && CanCover(node, m.left().node())) {
Int32BinopMatcher mleft(m.left().node());
......@@ -954,18 +1004,6 @@ void InstructionSelector::VisitInt32Mul(Node* node) {
}
}
// x * (2^k + 1) -> x + (x << k)
if (m.right().HasValue() && m.right().Value() > 0) {
int32_t value = m.right().Value();
if (base::bits::IsPowerOfTwo32(value - 1)) {
Emit(kArm64Add32 | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.UseRegister(m.left().node()),
g.TempImmediate(WhichPowerOf2(value - 1)));
return;
}
}
VisitRRR(this, kArm64Mul32, node);
}
......@@ -974,6 +1012,16 @@ void InstructionSelector::VisitInt64Mul(Node* node) {
Arm64OperandGenerator g(this);
Int64BinopMatcher m(node);
// First, try to reduce the multiplication to addition with left shift.
// x * (2^k + 1) -> x + (x << k)
int32_t shift = LeftShiftForReducedMultiply(&m);
if (shift > 0) {
Emit(kArm64Add | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.UseRegister(m.left().node()), g.TempImmediate(shift));
return;
}
if (m.left().IsInt64Sub() && CanCover(node, m.left().node())) {
Int64BinopMatcher mleft(m.left().node());
......@@ -997,18 +1045,6 @@ void InstructionSelector::VisitInt64Mul(Node* node) {
}
}
// x * (2^k + 1) -> x + (x << k)
if (m.right().HasValue() && m.right().Value() > 0) {
int64_t value = m.right().Value();
if (base::bits::IsPowerOfTwo64(value - 1)) {
Emit(kArm64Add | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.UseRegister(m.left().node()),
g.TempImmediate(WhichPowerOf2_64(value - 1)));
return;
}
}
VisitRRR(this, kArm64Mul, node);
}
......
......@@ -1641,6 +1641,102 @@ TEST_F(InstructionSelectorTest, Int32MulWithImmediate) {
EXPECT_EQ(k, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// x * (2^k + 1) + c -> x + (x << k) + c
TRACED_FORRANGE(int32_t, k, 1, 30) {
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
m.Return(
m.Int32Add(m.Int32Mul(m.Parameter(0), m.Int32Constant((1 << k) + 1)),
m.Parameter(1)));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add32, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add32, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// (2^k + 1) * x + c -> x + (x << k) + c
TRACED_FORRANGE(int32_t, k, 1, 30) {
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
m.Return(
m.Int32Add(m.Int32Mul(m.Int32Constant((1 << k) + 1), m.Parameter(0)),
m.Parameter(1)));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add32, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add32, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c + x * (2^k + 1) -> c + x + (x << k)
TRACED_FORRANGE(int32_t, k, 1, 30) {
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
m.Return(
m.Int32Add(m.Parameter(0),
m.Int32Mul(m.Parameter(1), m.Int32Constant((1 << k) + 1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add32, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add32, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(1)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c + (2^k + 1) * x -> c + x + (x << k)
TRACED_FORRANGE(int32_t, k, 1, 30) {
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
m.Return(
m.Int32Add(m.Parameter(0),
m.Int32Mul(m.Int32Constant((1 << k) + 1), m.Parameter(1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add32, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add32, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(1)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c - x * (2^k + 1) -> c - x + (x << k)
TRACED_FORRANGE(int32_t, k, 1, 30) {
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
m.Return(
m.Int32Sub(m.Parameter(0),
m.Int32Mul(m.Parameter(1), m.Int32Constant((1 << k) + 1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add32, s[0]->arch_opcode());
EXPECT_EQ(kArm64Sub32, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(1)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c - (2^k + 1) * x -> c - x + (x << k)
TRACED_FORRANGE(int32_t, k, 1, 30) {
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
m.Return(
m.Int32Sub(m.Parameter(0),
m.Int32Mul(m.Int32Constant((1 << k) + 1), m.Parameter(1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add32, s[0]->arch_opcode());
EXPECT_EQ(kArm64Sub32, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(1)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
}
......@@ -1671,6 +1767,102 @@ TEST_F(InstructionSelectorTest, Int64MulWithImmediate) {
EXPECT_EQ(k, s.ToInt64(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// x * (2^k + 1) + c -> x + (x << k) + c
TRACED_FORRANGE(int64_t, k, 1, 62) {
StreamBuilder m(this, kMachInt64, kMachInt64, kMachInt64);
m.Return(
m.Int64Add(m.Int64Mul(m.Parameter(0), m.Int64Constant((1L << k) + 1)),
m.Parameter(1)));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt64(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// (2^k + 1) * x + c -> x + (x << k) + c
TRACED_FORRANGE(int64_t, k, 1, 62) {
StreamBuilder m(this, kMachInt64, kMachInt64, kMachInt64);
m.Return(
m.Int64Add(m.Int64Mul(m.Int64Constant((1L << k) + 1), m.Parameter(0)),
m.Parameter(1)));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt64(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c + x * (2^k + 1) -> c + x + (x << k)
TRACED_FORRANGE(int64_t, k, 1, 62) {
StreamBuilder m(this, kMachInt64, kMachInt64, kMachInt64);
m.Return(
m.Int64Add(m.Parameter(0),
m.Int64Mul(m.Parameter(1), m.Int64Constant((1L << k) + 1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt64(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c + (2^k + 1) * x -> c + x + (x << k)
TRACED_FORRANGE(int64_t, k, 1, 62) {
StreamBuilder m(this, kMachInt64, kMachInt64, kMachInt64);
m.Return(
m.Int64Add(m.Parameter(0),
m.Int64Mul(m.Int64Constant((1L << k) + 1), m.Parameter(1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add, s[0]->arch_opcode());
EXPECT_EQ(kArm64Add, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt64(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c - x * (2^k + 1) -> c - x + (x << k)
TRACED_FORRANGE(int64_t, k, 1, 62) {
StreamBuilder m(this, kMachInt64, kMachInt64, kMachInt64);
m.Return(
m.Int64Sub(m.Parameter(0),
m.Int64Mul(m.Parameter(1), m.Int64Constant((1L << k) + 1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add, s[0]->arch_opcode());
EXPECT_EQ(kArm64Sub, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt64(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
// c - (2^k + 1) * x -> c - x + (x << k)
TRACED_FORRANGE(int64_t, k, 1, 62) {
StreamBuilder m(this, kMachInt64, kMachInt64, kMachInt64);
m.Return(
m.Int64Sub(m.Parameter(0),
m.Int64Mul(m.Int64Constant((1L << k) + 1), m.Parameter(1))));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
EXPECT_EQ(kArm64Add, s[0]->arch_opcode());
EXPECT_EQ(kArm64Sub, s[1]->arch_opcode());
EXPECT_EQ(kMode_Operand2_R_LSL_I, s[0]->addressing_mode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(k, s.ToInt64(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment