Commit 5d38a300 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[ia32] Merge SSE/AVX float32/float64 abs neg

This removes 4 arch opcodes.

Bug: v8:11217
Change-Id: Idff04fb205c7d7d1577ce123cc2160d678dfe39a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3114599Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76473}
parent c6c26299
......@@ -5,6 +5,7 @@
#include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/callable.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/ia32/assembler-ia32.h"
#include "src/codegen/ia32/register-ia32.h"
#include "src/codegen/macro-assembler.h"
......@@ -1260,22 +1261,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEFloat32Sqrt:
__ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEFloat32Abs: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psrlq(tmp, 33);
__ andps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Neg: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psllq(tmp, 31);
__ xorps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Round: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
RoundingMode const mode =
......@@ -1425,22 +1410,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(esp, tmp);
break;
}
case kSSEFloat64Abs: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psrlq(tmp, 1);
__ andps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat64Neg: {
// TODO(bmeurer): Use 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psllq(tmp, 63);
__ xorps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat64Sqrt:
__ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
break;
......@@ -1554,40 +1523,60 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
}
case kAVXFloat32Abs: {
case kFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psrlq(tmp, 33);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, byte{33});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
__ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break;
}
case kAVXFloat32Neg: {
case kFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psllq(tmp, 31);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, byte{31});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
__ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break;
}
case kAVXFloat64Abs: {
case kFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psrlq(tmp, 1);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, byte{1});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
__ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break;
}
case kAVXFloat64Neg: {
case kFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqd(tmp, tmp);
__ psllq(tmp, 63);
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, byte{63});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
__ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
break;
}
case kSSEFloat64SilenceNaN:
......
......@@ -48,8 +48,6 @@ namespace compiler {
V(IA32MFence) \
V(IA32LFence) \
V(SSEFloat32Cmp) \
V(SSEFloat32Abs) \
V(SSEFloat32Neg) \
V(SSEFloat32Sqrt) \
V(SSEFloat32Round) \
V(SSEFloat64Cmp) \
......@@ -58,8 +56,6 @@ namespace compiler {
V(SSEFloat64Max) \
V(SSEFloat32Min) \
V(SSEFloat64Min) \
V(SSEFloat64Abs) \
V(SSEFloat64Neg) \
V(SSEFloat64Sqrt) \
V(SSEFloat64Round) \
V(SSEFloat32ToFloat64) \
......@@ -86,10 +82,10 @@ namespace compiler {
V(Float32Div) \
V(Float64Mul) \
V(Float64Div) \
V(AVXFloat64Abs) \
V(AVXFloat64Neg) \
V(AVXFloat32Abs) \
V(AVXFloat32Neg) \
V(Float64Abs) \
V(Float64Neg) \
V(Float32Abs) \
V(Float32Neg) \
V(IA32Movsxbl) \
V(IA32Movzxbl) \
V(IA32Movb) \
......
......@@ -49,8 +49,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Bswap:
case kIA32Lea:
case kSSEFloat32Cmp:
case kSSEFloat32Abs:
case kSSEFloat32Neg:
case kSSEFloat32Sqrt:
case kSSEFloat32Round:
case kSSEFloat64Cmp:
......@@ -59,8 +57,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEFloat64Max:
case kSSEFloat32Min:
case kSSEFloat64Min:
case kSSEFloat64Abs:
case kSSEFloat64Neg:
case kSSEFloat64Sqrt:
case kSSEFloat64Round:
case kSSEFloat32ToFloat64:
......@@ -87,10 +83,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kFloat32Div:
case kFloat64Mul:
case kFloat64Div:
case kAVXFloat64Abs:
case kAVXFloat64Neg:
case kAVXFloat32Abs:
case kAVXFloat32Neg:
case kFloat64Abs:
case kFloat64Neg:
case kFloat32Abs:
case kFloat32Neg:
case kIA32BitcastFI:
case kIA32BitcastIF:
case kIA32F64x2Splat:
......@@ -452,12 +448,12 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kFloat32Sub:
case kFloat64Add:
case kFloat64Sub:
case kSSEFloat32Abs:
case kSSEFloat32Neg:
case kFloat32Abs:
case kFloat32Neg:
case kSSEFloat64Max:
case kSSEFloat64Min:
case kSSEFloat64Abs:
case kSSEFloat64Neg:
case kFloat64Abs:
case kFloat64Neg:
return 5;
case kFloat32Mul:
return 4;
......
......@@ -327,15 +327,12 @@ void VisitRROFloat(InstructionSelector* selector, Node* node,
}
void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempSimd128Register()};
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input),
arraysize(temps), temps);
selector->Emit(opcode, g.DefineAsRegister(node), g.Use(input));
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node),
g.UseUniqueRegister(input), arraysize(temps), temps);
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input));
}
}
......@@ -1194,12 +1191,12 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(F64x2Le, kIA32F64x2Le)
#define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
V(Float64Abs, kAVXFloat64Abs, kSSEFloat64Abs) \
V(Float32Neg, kAVXFloat32Neg, kSSEFloat32Neg) \
V(Float64Neg, kAVXFloat64Neg, kSSEFloat64Neg) \
V(F64x2Abs, kAVXFloat64Abs, kSSEFloat64Abs) \
V(F64x2Neg, kAVXFloat64Neg, kSSEFloat64Neg)
V(Float32Abs, kFloat32Abs) \
V(Float64Abs, kFloat64Abs) \
V(Float32Neg, kFloat32Neg) \
V(Float64Neg, kFloat64Neg) \
V(F64x2Abs, kFloat64Abs) \
V(F64x2Neg, kFloat64Neg)
#define RO_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
......@@ -1241,9 +1238,9 @@ RRO_FLOAT_OP_LIST(RRO_FLOAT_VISITOR)
#undef RRO_FLOAT_VISITOR
#undef RRO_FLOAT_OP_LIST
#define FLOAT_UNOP_VISITOR(Name, avx, sse) \
#define FLOAT_UNOP_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
VisitFloatUnop(this, node, node->InputAt(0), avx, sse); \
VisitFloatUnop(this, node, node->InputAt(0), opcode); \
}
FLOAT_UNOP_LIST(FLOAT_UNOP_VISITOR)
#undef FLOAT_UNOP_VISITOR
......
......@@ -743,7 +743,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat32Abs, s[0]->arch_opcode());
EXPECT_EQ(kFloat32Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......@@ -758,7 +758,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
m.Return(n);
Stream s = m.Build(AVX);
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kAVXFloat32Abs, s[0]->arch_opcode());
EXPECT_EQ(kFloat32Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......@@ -776,7 +776,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat64Abs, s[0]->arch_opcode());
EXPECT_EQ(kFloat64Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......@@ -791,7 +791,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
m.Return(n);
Stream s = m.Build(AVX);
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kAVXFloat64Abs, s[0]->arch_opcode());
EXPECT_EQ(kFloat64Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment