Commit 5d38a300 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[ia32] Merge SSE/AVX float32/float64 abs neg

This removes 4 arch opcodes.

Bug: v8:11217
Change-Id: Idff04fb205c7d7d1577ce123cc2160d678dfe39a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3114599Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76473}
parent c6c26299
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "src/base/overflowing-math.h" #include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h" #include "src/codegen/assembler-inl.h"
#include "src/codegen/callable.h" #include "src/codegen/callable.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/ia32/assembler-ia32.h" #include "src/codegen/ia32/assembler-ia32.h"
#include "src/codegen/ia32/register-ia32.h" #include "src/codegen/ia32/register-ia32.h"
#include "src/codegen/macro-assembler.h" #include "src/codegen/macro-assembler.h"
...@@ -1260,22 +1261,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1260,22 +1261,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEFloat32Sqrt: case kSSEFloat32Sqrt:
__ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0)); __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
break; break;
case kSSEFloat32Abs: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psrlq(tmp, 33);
__ andps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Neg: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psllq(tmp, 31);
__ xorps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Round: { case kSSEFloat32Round: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
RoundingMode const mode = RoundingMode const mode =
...@@ -1425,22 +1410,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1425,22 +1410,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(esp, tmp); __ mov(esp, tmp);
break; break;
} }
case kSSEFloat64Abs: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psrlq(tmp, 1);
__ andps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat64Neg: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psllq(tmp, 63);
__ xorps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat64Sqrt: case kSSEFloat64Sqrt:
__ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0)); __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
break; break;
...@@ -1554,40 +1523,60 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1554,40 +1523,60 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break; break;
} }
case kAVXFloat32Abs: { case kFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pcmpeqd(tmp, tmp); __ Psrlq(kScratchDoubleReg, byte{33});
__ psrlq(tmp, 33); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX); CpuFeatureScope avx_scope(tasm(), AVX);
__ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break; break;
} }
case kAVXFloat32Neg: { case kFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pcmpeqd(tmp, tmp); __ Psllq(kScratchDoubleReg, byte{31});
__ psllq(tmp, 31); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX); CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break; break;
} }
case kAVXFloat64Abs: { case kFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pcmpeqd(tmp, tmp); __ Psrlq(kScratchDoubleReg, byte{1});
__ psrlq(tmp, 1); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX); CpuFeatureScope avx_scope(tasm(), AVX);
__ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break; break;
} }
case kAVXFloat64Neg: { case kFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pcmpeqd(tmp, tmp); __ Psllq(kScratchDoubleReg, byte{63});
__ psllq(tmp, 63); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX); CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0)); __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break; break;
} }
case kSSEFloat64SilenceNaN: case kSSEFloat64SilenceNaN:
......
...@@ -48,8 +48,6 @@ namespace compiler { ...@@ -48,8 +48,6 @@ namespace compiler {
V(IA32MFence) \ V(IA32MFence) \
V(IA32LFence) \ V(IA32LFence) \
V(SSEFloat32Cmp) \ V(SSEFloat32Cmp) \
V(SSEFloat32Abs) \
V(SSEFloat32Neg) \
V(SSEFloat32Sqrt) \ V(SSEFloat32Sqrt) \
V(SSEFloat32Round) \ V(SSEFloat32Round) \
V(SSEFloat64Cmp) \ V(SSEFloat64Cmp) \
...@@ -58,8 +56,6 @@ namespace compiler { ...@@ -58,8 +56,6 @@ namespace compiler {
V(SSEFloat64Max) \ V(SSEFloat64Max) \
V(SSEFloat32Min) \ V(SSEFloat32Min) \
V(SSEFloat64Min) \ V(SSEFloat64Min) \
V(SSEFloat64Abs) \
V(SSEFloat64Neg) \
V(SSEFloat64Sqrt) \ V(SSEFloat64Sqrt) \
V(SSEFloat64Round) \ V(SSEFloat64Round) \
V(SSEFloat32ToFloat64) \ V(SSEFloat32ToFloat64) \
...@@ -86,10 +82,10 @@ namespace compiler { ...@@ -86,10 +82,10 @@ namespace compiler {
V(Float32Div) \ V(Float32Div) \
V(Float64Mul) \ V(Float64Mul) \
V(Float64Div) \ V(Float64Div) \
V(AVXFloat64Abs) \ V(Float64Abs) \
V(AVXFloat64Neg) \ V(Float64Neg) \
V(AVXFloat32Abs) \ V(Float32Abs) \
V(AVXFloat32Neg) \ V(Float32Neg) \
V(IA32Movsxbl) \ V(IA32Movsxbl) \
V(IA32Movzxbl) \ V(IA32Movzxbl) \
V(IA32Movb) \ V(IA32Movb) \
......
...@@ -49,8 +49,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -49,8 +49,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Bswap: case kIA32Bswap:
case kIA32Lea: case kIA32Lea:
case kSSEFloat32Cmp: case kSSEFloat32Cmp:
case kSSEFloat32Abs:
case kSSEFloat32Neg:
case kSSEFloat32Sqrt: case kSSEFloat32Sqrt:
case kSSEFloat32Round: case kSSEFloat32Round:
case kSSEFloat64Cmp: case kSSEFloat64Cmp:
...@@ -59,8 +57,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -59,8 +57,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEFloat64Max: case kSSEFloat64Max:
case kSSEFloat32Min: case kSSEFloat32Min:
case kSSEFloat64Min: case kSSEFloat64Min:
case kSSEFloat64Abs:
case kSSEFloat64Neg:
case kSSEFloat64Sqrt: case kSSEFloat64Sqrt:
case kSSEFloat64Round: case kSSEFloat64Round:
case kSSEFloat32ToFloat64: case kSSEFloat32ToFloat64:
...@@ -87,10 +83,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -87,10 +83,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kFloat32Div: case kFloat32Div:
case kFloat64Mul: case kFloat64Mul:
case kFloat64Div: case kFloat64Div:
case kAVXFloat64Abs: case kFloat64Abs:
case kAVXFloat64Neg: case kFloat64Neg:
case kAVXFloat32Abs: case kFloat32Abs:
case kAVXFloat32Neg: case kFloat32Neg:
case kIA32BitcastFI: case kIA32BitcastFI:
case kIA32BitcastIF: case kIA32BitcastIF:
case kIA32F64x2Splat: case kIA32F64x2Splat:
...@@ -452,12 +448,12 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) { ...@@ -452,12 +448,12 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kFloat32Sub: case kFloat32Sub:
case kFloat64Add: case kFloat64Add:
case kFloat64Sub: case kFloat64Sub:
case kSSEFloat32Abs: case kFloat32Abs:
case kSSEFloat32Neg: case kFloat32Neg:
case kSSEFloat64Max: case kSSEFloat64Max:
case kSSEFloat64Min: case kSSEFloat64Min:
case kSSEFloat64Abs: case kFloat64Abs:
case kSSEFloat64Neg: case kFloat64Neg:
return 5; return 5;
case kFloat32Mul: case kFloat32Mul:
return 4; return 4;
......
...@@ -327,15 +327,12 @@ void VisitRROFloat(InstructionSelector* selector, Node* node, ...@@ -327,15 +327,12 @@ void VisitRROFloat(InstructionSelector* selector, Node* node,
} }
void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input, void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) { ArchOpcode opcode) {
IA32OperandGenerator g(selector); IA32OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempSimd128Register()};
if (selector->IsSupported(AVX)) { if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input), selector->Emit(opcode, g.DefineAsRegister(node), g.Use(input));
arraysize(temps), temps);
} else { } else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input));
g.UseUniqueRegister(input), arraysize(temps), temps);
} }
} }
...@@ -1194,12 +1191,12 @@ void InstructionSelector::VisitWord32Ror(Node* node) { ...@@ -1194,12 +1191,12 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(F64x2Le, kIA32F64x2Le) V(F64x2Le, kIA32F64x2Le)
#define FLOAT_UNOP_LIST(V) \ #define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \ V(Float32Abs, kFloat32Abs) \
V(Float64Abs, kAVXFloat64Abs, kSSEFloat64Abs) \ V(Float64Abs, kFloat64Abs) \
V(Float32Neg, kAVXFloat32Neg, kSSEFloat32Neg) \ V(Float32Neg, kFloat32Neg) \
V(Float64Neg, kAVXFloat64Neg, kSSEFloat64Neg) \ V(Float64Neg, kFloat64Neg) \
V(F64x2Abs, kAVXFloat64Abs, kSSEFloat64Abs) \ V(F64x2Abs, kFloat64Abs) \
V(F64x2Neg, kAVXFloat64Neg, kSSEFloat64Neg) V(F64x2Neg, kFloat64Neg)
#define RO_VISITOR(Name, opcode) \ #define RO_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \ void InstructionSelector::Visit##Name(Node* node) { \
...@@ -1241,9 +1238,9 @@ RRO_FLOAT_OP_LIST(RRO_FLOAT_VISITOR) ...@@ -1241,9 +1238,9 @@ RRO_FLOAT_OP_LIST(RRO_FLOAT_VISITOR)
#undef RRO_FLOAT_VISITOR #undef RRO_FLOAT_VISITOR
#undef RRO_FLOAT_OP_LIST #undef RRO_FLOAT_OP_LIST
#define FLOAT_UNOP_VISITOR(Name, avx, sse) \ #define FLOAT_UNOP_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \ void InstructionSelector::Visit##Name(Node* node) { \
VisitFloatUnop(this, node, node->InputAt(0), avx, sse); \ VisitFloatUnop(this, node, node->InputAt(0), opcode); \
} }
FLOAT_UNOP_LIST(FLOAT_UNOP_VISITOR) FLOAT_UNOP_LIST(FLOAT_UNOP_VISITOR)
#undef FLOAT_UNOP_VISITOR #undef FLOAT_UNOP_VISITOR
......
...@@ -743,7 +743,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) { ...@@ -743,7 +743,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
m.Return(n); m.Return(n);
Stream s = m.Build(); Stream s = m.Build();
ASSERT_EQ(1U, s.size()); ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat32Abs, s[0]->arch_opcode()); EXPECT_EQ(kFloat32Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount()); ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount()); ASSERT_EQ(1U, s[0]->OutputCount());
...@@ -758,7 +758,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) { ...@@ -758,7 +758,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
m.Return(n); m.Return(n);
Stream s = m.Build(AVX); Stream s = m.Build(AVX);
ASSERT_EQ(1U, s.size()); ASSERT_EQ(1U, s.size());
EXPECT_EQ(kAVXFloat32Abs, s[0]->arch_opcode()); EXPECT_EQ(kFloat32Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount()); ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount()); ASSERT_EQ(1U, s[0]->OutputCount());
...@@ -776,7 +776,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) { ...@@ -776,7 +776,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
m.Return(n); m.Return(n);
Stream s = m.Build(); Stream s = m.Build();
ASSERT_EQ(1U, s.size()); ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat64Abs, s[0]->arch_opcode()); EXPECT_EQ(kFloat64Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount()); ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount()); ASSERT_EQ(1U, s[0]->OutputCount());
...@@ -791,7 +791,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) { ...@@ -791,7 +791,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
m.Return(n); m.Return(n);
Stream s = m.Build(AVX); Stream s = m.Build(AVX);
ASSERT_EQ(1U, s.size()); ASSERT_EQ(1U, s.size());
EXPECT_EQ(kAVXFloat64Abs, s[0]->arch_opcode()); EXPECT_EQ(kFloat64Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount()); ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount()); ASSERT_EQ(1U, s[0]->OutputCount());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment