Commit abcd1835 authored by Kong, Fanchen's avatar Kong, Fanchen Committed by Commit Bot

[turbofan] Enable complex memory operands for floating-point binop on x64

With this change, a load from memory into a register can be replaced by a memory operand for floating point binops if possible.

This eliminates one instruction for following pattern:
	vmovss xmm0, m32
	vmulss xmm1, xmm1, xmm0
===>
	vmulss xmm1, xmm1, m32

Change-Id: I6944287fae3b7756621fb6b3d0b3db9e0beaf080
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2411696
Commit-Queue: Fanchen Kong <fanchen.kong@intel.com>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70255}
parent 371b1a61
......@@ -487,13 +487,19 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
} \
} while (false)
#define ASSEMBLE_SSE_BINOP(asm_instr) \
do { \
if (instr->InputAt(1)->IsFPRegister()) { \
__ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
} else { \
__ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
} \
#define ASSEMBLE_SSE_BINOP(asm_instr) \
do { \
if (HasAddressingMode(instr)) { \
size_t index = 1; \
Operand right = i.MemoryOperand(&index); \
__ asm_instr(i.InputDoubleRegister(0), right); \
} else { \
if (instr->InputAt(1)->IsFPRegister()) { \
__ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
} else { \
__ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
} \
} \
} while (false)
#define ASSEMBLE_SSE_UNOP(asm_instr) \
......@@ -505,16 +511,22 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
} \
} while (false)
#define ASSEMBLE_AVX_BINOP(asm_instr) \
do { \
CpuFeatureScope avx_scope(tasm(), AVX); \
if (instr->InputAt(1)->IsFPRegister()) { \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
i.InputDoubleRegister(1)); \
} else { \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
i.InputOperand(1)); \
} \
#define ASSEMBLE_AVX_BINOP(asm_instr) \
do { \
CpuFeatureScope avx_scope(tasm(), AVX); \
if (HasAddressingMode(instr)) { \
size_t index = 1; \
Operand right = i.MemoryOperand(&index); \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
} else { \
if (instr->InputAt(1)->IsFPRegister()) { \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
i.InputDoubleRegister(1)); \
} else { \
__ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
i.InputOperand(1)); \
} \
} \
} while (false)
#define ASSEMBLE_IEEE754_BINOP(name) \
......
......@@ -90,6 +90,16 @@ class X64OperandGenerator final : public OperandGenerator {
return rep == MachineRepresentation::kWord32 ||
(COMPRESS_POINTERS_BOOL &&
(IsAnyTagged(rep) || IsAnyCompressed(rep)));
case kAVXFloat64Add:
case kAVXFloat64Sub:
case kAVXFloat64Mul:
DCHECK_EQ(MachineRepresentation::kFloat64, rep);
return true;
case kAVXFloat32Add:
case kAVXFloat32Sub:
case kAVXFloat32Mul:
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
return true;
case kX64Cmp16:
case kX64Test16:
return rep == MachineRepresentation::kWord16;
......@@ -1401,14 +1411,60 @@ void VisitRRO(InstructionSelector* selector, Node* node,
}
void VisitFloatBinop(InstructionSelector* selector, Node* node,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
InstructionCode avx_opcode, InstructionCode sse_opcode) {
X64OperandGenerator g(selector);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.Use(node->InputAt(1));
Node* left = node->InputAt(0);
Node* right = node->InputAt(1);
InstructionOperand inputs[8];
size_t input_count = 0;
InstructionOperand outputs[1];
size_t output_count = 0;
if (left == right) {
// If both inputs refer to the same operand, enforce allocating a register
// for both of them to ensure that we don't end up generating code like
// this:
//
// movss rax, [rbp-0x10]
// addss rax, [rbp-0x10]
// jo label
InstructionOperand const input = g.UseRegister(left);
inputs[input_count++] = input;
inputs[input_count++] = input;
} else {
int effect_level = selector->GetEffectLevel(node);
if (node->op()->HasProperty(Operator::kCommutative) &&
(g.CanBeBetterLeftOperand(right) ||
g.CanBeMemoryOperand(avx_opcode, node, left, effect_level)) &&
(!g.CanBeBetterLeftOperand(left) ||
!g.CanBeMemoryOperand(avx_opcode, node, right, effect_level))) {
std::swap(left, right);
}
if (g.CanBeMemoryOperand(avx_opcode, node, right, effect_level)) {
inputs[input_count++] = g.UseRegister(left);
AddressingMode addressing_mode =
g.GetEffectiveAddressMemoryOperand(right, inputs, &input_count);
avx_opcode |= AddressingModeField::encode(addressing_mode);
sse_opcode |= AddressingModeField::encode(addressing_mode);
} else {
inputs[input_count++] = g.UseRegister(left);
inputs[input_count++] = g.Use(right);
}
}
DCHECK_NE(0u, input_count);
DCHECK_GE(arraysize(inputs), input_count);
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1);
outputs[output_count++] = g.DefineAsRegister(node);
DCHECK_EQ(1u, output_count);
DCHECK_GE(arraysize(outputs), output_count);
selector->Emit(avx_opcode, output_count, outputs, input_count, inputs);
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
outputs[output_count++] = g.DefineSameAsFirst(node);
DCHECK_EQ(1u, output_count);
DCHECK_GE(arraysize(outputs), output_count);
selector->Emit(sse_opcode, output_count, outputs, input_count, inputs);
}
}
......
......@@ -1587,6 +1587,112 @@ TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
}
}
TEST_F(InstructionSelectorTest, Float32BinopArithmeticWithLoad) {
{
StreamBuilder m(this, MachineType::Float32(), MachineType::Float32(),
MachineType::Int64(), MachineType::Int64());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const p2 = m.Parameter(2);
Node* add = m.Float32Add(
p0, m.Load(MachineType::Float32(), p1, m.Int32Constant(127)));
Node* sub = m.Float32Sub(
add, m.Load(MachineType::Float32(), p1, m.Int32Constant(127)));
Node* ret = m.Float32Mul(
m.Load(MachineType::Float32(), p2, m.Int32Constant(127)), sub);
m.Return(ret);
Stream s = m.Build(AVX);
ASSERT_EQ(3U, s.size());
EXPECT_EQ(kAVXFloat32Add, s[0]->arch_opcode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(kAVXFloat32Sub, s[1]->arch_opcode());
ASSERT_EQ(3U, s[1]->InputCount());
EXPECT_EQ(kAVXFloat32Mul, s[2]->arch_opcode());
ASSERT_EQ(3U, s[2]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(s.ToVreg(p2), s.ToVreg(s[2]->InputAt(1)));
}
{
StreamBuilder m(this, MachineType::Float32(), MachineType::Float32(),
MachineType::Int64(), MachineType::Int64());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const p2 = m.Parameter(2);
Node* add = m.Float32Add(
p0, m.Load(MachineType::Float32(), p1, m.Int32Constant(127)));
Node* sub = m.Float32Sub(
add, m.Load(MachineType::Float32(), p1, m.Int32Constant(127)));
Node* ret = m.Float32Mul(
m.Load(MachineType::Float32(), p2, m.Int32Constant(127)), sub);
m.Return(ret);
Stream s = m.Build();
ASSERT_EQ(3U, s.size());
EXPECT_EQ(kSSEFloat32Add, s[0]->arch_opcode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(kSSEFloat32Sub, s[1]->arch_opcode());
ASSERT_EQ(3U, s[1]->InputCount());
EXPECT_EQ(kSSEFloat32Mul, s[2]->arch_opcode());
ASSERT_EQ(3U, s[2]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(s.ToVreg(p2), s.ToVreg(s[2]->InputAt(1)));
}
}
TEST_F(InstructionSelectorTest, Float64BinopArithmeticWithLoad) {
{
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
MachineType::Int64(), MachineType::Int64());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const p2 = m.Parameter(2);
Node* add = m.Float64Add(
p0, m.Load(MachineType::Float64(), p1, m.Int32Constant(127)));
Node* sub = m.Float64Sub(
add, m.Load(MachineType::Float64(), p1, m.Int32Constant(127)));
Node* ret = m.Float64Mul(
m.Load(MachineType::Float64(), p2, m.Int32Constant(127)), sub);
m.Return(ret);
Stream s = m.Build(AVX);
ASSERT_EQ(3U, s.size());
EXPECT_EQ(kAVXFloat64Add, s[0]->arch_opcode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(kAVXFloat64Sub, s[1]->arch_opcode());
ASSERT_EQ(3U, s[1]->InputCount());
EXPECT_EQ(kAVXFloat64Mul, s[2]->arch_opcode());
ASSERT_EQ(3U, s[2]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(s.ToVreg(p2), s.ToVreg(s[2]->InputAt(1)));
}
{
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
MachineType::Int64(), MachineType::Int64());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const p2 = m.Parameter(2);
Node* add = m.Float64Add(
p0, m.Load(MachineType::Float64(), p1, m.Int32Constant(127)));
Node* sub = m.Float64Sub(
add, m.Load(MachineType::Float64(), p1, m.Int32Constant(127)));
Node* ret = m.Float64Mul(
m.Load(MachineType::Float64(), p2, m.Int32Constant(127)), sub);
m.Return(ret);
Stream s = m.Build();
ASSERT_EQ(3U, s.size());
EXPECT_EQ(kSSEFloat64Add, s[0]->arch_opcode());
ASSERT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(kSSEFloat64Sub, s[1]->arch_opcode());
ASSERT_EQ(3U, s[1]->InputCount());
EXPECT_EQ(kSSEFloat64Mul, s[2]->arch_opcode());
ASSERT_EQ(3U, s[2]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
EXPECT_EQ(s.ToVreg(p2), s.ToVreg(s[2]->InputAt(1)));
}
}
// -----------------------------------------------------------------------------
// Miscellaneous.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment