Commit 357e0143 authored by martyn.capewell's avatar martyn.capewell Committed by Commit bot

Reland of [turbofan] ARM: Improve AND instruction selection

Improve instruction selector for mask and shift operations by using cheaper
instructions where possible, in preference to UBFX.

Reverted because it was suspected of causing a couple of flaky tests to fail,
but investigation suggests this is unlikely.

Original review: https://codereview.chromium.org/1677023002

BUG=

Review URL: https://codereview.chromium.org/1684073006

Cr-Commit-Position: refs/heads/master@{#33988}
parent f92a5631
......@@ -551,43 +551,67 @@ void InstructionSelector::VisitWord32And(Node* node) {
if (m.right().HasValue()) {
uint32_t const value = m.right().Value();
uint32_t width = base::bits::CountPopulation32(value);
uint32_t msb = base::bits::CountLeadingZeros32(value);
// Try to interpret this AND as UBFX.
if (IsSupported(ARMv7) && width != 0 && msb + width == 32) {
DCHECK_EQ(0u, base::bits::CountTrailingZeros32(value));
uint32_t leading_zeros = base::bits::CountLeadingZeros32(value);
// Try to merge SHR operations on the left hand input into this AND.
if (m.left().IsWord32Shr()) {
Int32BinopMatcher mleft(m.left().node());
if (mleft.right().IsInRange(0, 31)) {
Int32BinopMatcher mshr(m.left().node());
if (mshr.right().HasValue()) {
uint32_t const shift = mshr.right().Value();
if (((shift == 8) || (shift == 16) || (shift == 24)) &&
((value == 0xff) || (value == 0xffff))) {
// Merge SHR into AND by emitting a UXTB or UXTH instruction with a
// bytewise rotation.
Emit((value == 0xff) ? kArmUxtb : kArmUxth,
g.DefineAsRegister(m.node()), g.UseRegister(mshr.left().node()),
g.TempImmediate(mshr.right().Value()));
return;
} else if (IsSupported(ARMv7) && (width != 0) &&
((leading_zeros + width) == 32)) {
// Merge Shr into And by emitting a UBFX instruction.
DCHECK_EQ(0u, base::bits::CountTrailingZeros32(value));
if ((1 <= shift) && (shift <= 31)) {
// UBFX cannot extract bits past the register size, however since
// shifting the original value would have introduced some zeros we can
// still use UBFX with a smaller mask and the remaining bits will be
// zeros.
uint32_t const lsb = mleft.right().Value();
return EmitUbfx(this, node, mleft.left().node(), lsb,
std::min(width, 32 - lsb));
// shifting the original value would have introduced some zeros we
// can still use UBFX with a smaller mask and the remaining bits
// will be zeros.
EmitUbfx(this, node, mshr.left().node(), shift,
std::min(width, 32 - shift));
return;
}
}
}
return EmitUbfx(this, node, m.left().node(), 0, width);
} else if (value == 0xffff) {
// Emit UXTH for this AND. We don't bother testing for UXTB, as it's no
// better than AND 0xff for this operation.
Emit(kArmUxth, g.DefineAsRegister(m.node()),
g.UseRegister(m.left().node()), g.TempImmediate(0));
return;
}
// Try to interpret this AND as BIC.
if (g.CanBeImmediate(~value)) {
// Emit BIC for this AND by inverting the immediate value first.
Emit(kArmBic | AddressingModeField::encode(kMode_Operand2_I),
g.DefineAsRegister(node), g.UseRegister(m.left().node()),
g.TempImmediate(~value));
return;
}
// Try to interpret this AND as UXTH.
if (value == 0xffff) {
Emit(kArmUxth, g.DefineAsRegister(m.node()),
g.UseRegister(m.left().node()), g.TempImmediate(0));
if (!g.CanBeImmediate(value) && IsSupported(ARMv7)) {
// If value has 9 to 23 contiguous set bits, and has the lsb set, we can
// replace this AND with UBFX. Other contiguous bit patterns have already
// been handled by BIC or will be handled by AND.
if ((width != 0) && ((leading_zeros + width) == 32) &&
(9 <= leading_zeros) && (leading_zeros <= 23)) {
DCHECK_EQ(0u, base::bits::CountTrailingZeros32(value));
EmitUbfx(this, node, m.left().node(), 0, width);
return;
}
// Try to interpret this AND as BFC.
if (IsSupported(ARMv7)) {
width = 32 - width;
msb = base::bits::CountLeadingZeros32(~value);
leading_zeros = base::bits::CountLeadingZeros32(~value);
uint32_t lsb = base::bits::CountTrailingZeros32(~value);
if (msb + width + lsb == 32) {
if ((leading_zeros + width + lsb) == 32) {
// This AND can be replaced with BFC.
Emit(kArmBfc, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
g.TempImmediate(lsb), g.TempImmediate(width));
return;
......
......@@ -2545,7 +2545,8 @@ TEST_F(InstructionSelectorTest, Uint32ModWithParametersForSUDIVAndMLS) {
TEST_F(InstructionSelectorTest, Word32AndWithUbfxImmediateForARMv7) {
TRACED_FORRANGE(int32_t, width, 1, 32) {
TRACED_FORRANGE(int32_t, width, 9, 23) {
if (width == 16) continue; // Uxth.
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
m.Return(m.Word32And(m.Parameter(0),
m.Int32Constant(0xffffffffu >> (32 - width))));
......@@ -2556,7 +2557,8 @@ TEST_F(InstructionSelectorTest, Word32AndWithUbfxImmediateForARMv7) {
EXPECT_EQ(0, s.ToInt32(s[0]->InputAt(1)));
EXPECT_EQ(width, s.ToInt32(s[0]->InputAt(2)));
}
TRACED_FORRANGE(int32_t, width, 1, 32) {
TRACED_FORRANGE(int32_t, width, 9, 23) {
if (width == 16) continue; // Uxth.
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
m.Return(m.Word32And(m.Int32Constant(0xffffffffu >> (32 - width)),
m.Parameter(0)));
......@@ -2572,7 +2574,7 @@ TEST_F(InstructionSelectorTest, Word32AndWithUbfxImmediateForARMv7) {
TEST_F(InstructionSelectorTest, Word32AndWithBfcImmediateForARMv7) {
TRACED_FORRANGE(int32_t, lsb, 0, 31) {
TRACED_FORRANGE(int32_t, width, 9, (32 - lsb) - 1) {
TRACED_FORRANGE(int32_t, width, 9, (24 - lsb) - 1) {
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
m.Return(m.Word32And(
m.Parameter(0),
......@@ -2589,7 +2591,7 @@ TEST_F(InstructionSelectorTest, Word32AndWithBfcImmediateForARMv7) {
}
}
TRACED_FORRANGE(int32_t, lsb, 0, 31) {
TRACED_FORRANGE(int32_t, width, 9, (32 - lsb) - 1) {
TRACED_FORRANGE(int32_t, width, 9, (24 - lsb) - 1) {
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
m.Return(
m.Word32And(m.Int32Constant(~((0xffffffffu >> (32 - width)) << lsb)),
......@@ -2828,8 +2830,11 @@ TEST_F(InstructionSelectorTest, Word32NotWithParameter) {
TEST_F(InstructionSelectorTest, Word32AndWithWord32ShrWithImmediateForARMv7) {
TRACED_FORRANGE(int32_t, lsb, 0, 31) {
TRACED_FORRANGE(int32_t, lsb, 1, 31) {
TRACED_FORRANGE(int32_t, width, 1, 32 - lsb) {
if (((width == 8) || (width == 16)) &&
((lsb == 8) || (lsb == 16) || (lsb == 24)))
continue; // Uxtb/h ror.
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
m.Return(m.Word32And(m.Word32Shr(m.Parameter(0), m.Int32Constant(lsb)),
m.Int32Constant(0xffffffffu >> (32 - width))));
......@@ -2841,8 +2846,11 @@ TEST_F(InstructionSelectorTest, Word32AndWithWord32ShrWithImmediateForARMv7) {
EXPECT_EQ(width, s.ToInt32(s[0]->InputAt(2)));
}
}
TRACED_FORRANGE(int32_t, lsb, 0, 31) {
TRACED_FORRANGE(int32_t, lsb, 1, 31) {
TRACED_FORRANGE(int32_t, width, 1, 32 - lsb) {
if (((width == 8) || (width == 16)) &&
((lsb == 8) || (lsb == 16) || (lsb == 24)))
continue; // Uxtb/h ror.
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
m.Return(m.Word32And(m.Int32Constant(0xffffffffu >> (32 - width)),
m.Word32Shr(m.Parameter(0), m.Int32Constant(lsb))));
......@@ -2857,6 +2865,62 @@ TEST_F(InstructionSelectorTest, Word32AndWithWord32ShrWithImmediateForARMv7) {
}
TEST_F(InstructionSelectorTest, Word32AndWithWord32ShrAnd0xff) {
TRACED_FORRANGE(int32_t, shr, 1, 3) {
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
Node* const p0 = m.Parameter(0);
Node* const r = m.Word32And(m.Word32Shr(p0, m.Int32Constant(shr * 8)),
m.Int32Constant(0xff));
m.Return(r);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmUxtb, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(shr * 8, s.ToInt32(s[0]->InputAt(1)));
}
TRACED_FORRANGE(int32_t, shr, 1, 3) {
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
Node* const p0 = m.Parameter(0);
Node* const r = m.Word32And(m.Int32Constant(0xff),
m.Word32Shr(p0, m.Int32Constant(shr * 8)));
m.Return(r);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmUxtb, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(shr * 8, s.ToInt32(s[0]->InputAt(1)));
}
}
TEST_F(InstructionSelectorTest, Word32AndWithWord32ShrAnd0xffff) {
TRACED_FORRANGE(int32_t, shr, 1, 3) {
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
Node* const p0 = m.Parameter(0);
Node* const r = m.Word32And(m.Word32Shr(p0, m.Int32Constant(shr * 8)),
m.Int32Constant(0xffff));
m.Return(r);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmUxth, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(shr * 8, s.ToInt32(s[0]->InputAt(1)));
}
TRACED_FORRANGE(int32_t, shr, 1, 3) {
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
Node* const p0 = m.Parameter(0);
Node* const r = m.Word32And(m.Int32Constant(0xffff),
m.Word32Shr(p0, m.Int32Constant(shr * 8)));
m.Return(r);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArmUxth, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(shr * 8, s.ToInt32(s[0]->InputAt(1)));
}
}
TEST_F(InstructionSelectorTest, Word32Clz) {
StreamBuilder m(this, MachineType::Uint32(), MachineType::Uint32());
Node* const p0 = m.Parameter(0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment