Commit 9996d8ae authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[x64] Consolidate SSE/AVX Float32/Float64 Abs/Neg

1. Move Abspd, Negpd from MacroAssembler into TurboAssembler so that we
can use it in code-generator
2. Add Absps and Negps (float32 versions of the instructions in 1)
3. Refactor SSE/AVX float32/float64 abs/neg to use these macro-assembler
helpers.
4. Use these helpers in Liftoff too

This has the benefit of not requiring to set up the masks in a temporary
register, and loading the constants via an ExternalReference instead.
It does require (in ins-sel) to have the input be in a Register, since
the ExternalReference is an operand (and the instruction can only have 1
operand input).

Bug: v8:11589
Change-Id: I68fafaf31b19ab05ee391aa3d54c45d547a85b34
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3123635Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76520}
parent 5f84d2ce
......@@ -1562,16 +1562,6 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t high, uint64_t low) {
// ----------------------------------------------------------------------------
void MacroAssembler::Absps(XMMRegister dst) {
Andps(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_float_abs_constant()));
}
void MacroAssembler::Negps(XMMRegister dst) {
Xorps(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_float_neg_constant()));
}
void MacroAssembler::Cmp(Register dst, Handle<Object> source) {
if (source->IsSmi()) {
Cmp(dst, Smi::cast(*source));
......@@ -2202,14 +2192,44 @@ void TurboAssembler::Blendvpd(XMMRegister dst, XMMRegister src1,
}
}
void TurboAssembler::Abspd(XMMRegister dst) {
Andps(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_double_abs_constant()));
void TurboAssembler::Absps(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Andps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_float_abs_constant()));
}
void TurboAssembler::Negps(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Xorps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_float_neg_constant()));
}
void TurboAssembler::Abspd(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Andps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_double_abs_constant()));
}
void TurboAssembler::Negpd(XMMRegister dst) {
Xorps(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_double_neg_constant()));
void TurboAssembler::Negpd(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Xorps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_double_neg_constant()));
}
void TurboAssembler::Lzcntl(Register dst, Register src) {
......
......@@ -470,8 +470,10 @@ class V8_EXPORT_PRIVATE TurboAssembler
void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void Abspd(XMMRegister dst);
void Negpd(XMMRegister dst);
void Absps(XMMRegister dst, XMMRegister src);
void Negps(XMMRegister dst, XMMRegister src);
void Abspd(XMMRegister dst, XMMRegister src);
void Negpd(XMMRegister dst, XMMRegister src);
void CompareRoot(Register with, RootIndex index);
void CompareRoot(Operand with, RootIndex index);
......@@ -820,10 +822,6 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
void Pop(Operand dst);
void PopQuad(Operand dst);
// ---------------------------------------------------------------------------
// SIMD macros.
void Absps(XMMRegister dst);
void Negps(XMMRegister dst);
// Generates a trampoline to jump to the off-heap instruction stream.
void JumpToInstructionStream(Address entry);
......
......@@ -1597,22 +1597,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// when there is a (v)mulss depending on the result.
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ Pcmpeqd(tmp, tmp);
__ Psrlq(tmp, byte{33});
__ Andps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ Pcmpeqd(tmp, tmp);
__ Psllq(tmp, byte{31});
__ Xorps(i.OutputDoubleRegister(), tmp);
break;
}
case kSSEFloat32Sqrt:
ASSEMBLE_SSE_UNOP(sqrtss);
break;
......@@ -1809,16 +1793,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bind(ool->exit());
break;
}
case kX64F64x2Abs:
case kSSEFloat64Abs: {
__ Abspd(i.OutputDoubleRegister());
break;
}
case kX64F64x2Neg:
case kSSEFloat64Neg: {
__ Negpd(i.OutputDoubleRegister());
break;
}
case kSSEFloat64Sqrt:
ASSEMBLE_SSE_UNOP(Sqrtsd);
break;
......@@ -2071,56 +2045,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// when there is a (v)mulsd depending on the result.
__ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kAVXFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ vpcmpeqd(tmp, tmp, tmp);
__ vpsrlq(tmp, tmp, 33);
if (instr->InputAt(0)->IsFPRegister()) {
__ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
} else {
__ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
}
case kX64Float32Abs: {
__ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kAVXFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ vpcmpeqd(tmp, tmp, tmp);
__ vpsllq(tmp, tmp, 31);
if (instr->InputAt(0)->IsFPRegister()) {
__ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
} else {
__ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
}
case kX64Float32Neg: {
__ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kAVXFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ vpcmpeqd(tmp, tmp, tmp);
__ vpsrlq(tmp, tmp, 1);
if (instr->InputAt(0)->IsFPRegister()) {
__ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
} else {
__ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
}
case kX64F64x2Abs:
case kX64Float64Abs: {
__ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kAVXFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
__ vpcmpeqd(tmp, tmp, tmp);
__ vpsllq(tmp, tmp, 63);
if (instr->InputAt(0)->IsFPRegister()) {
__ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
} else {
__ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
}
case kX64F64x2Neg:
case kX64Float64Neg: {
__ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
}
case kSSEFloat64SilenceNaN:
......
......@@ -62,8 +62,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEFloat32Sub:
case kSSEFloat32Mul:
case kSSEFloat32Div:
case kSSEFloat32Abs:
case kSSEFloat32Neg:
case kSSEFloat32Sqrt:
case kSSEFloat32Round:
case kSSEFloat32ToFloat64:
......@@ -73,8 +71,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEFloat64Mul:
case kSSEFloat64Div:
case kSSEFloat64Mod:
case kSSEFloat64Abs:
case kSSEFloat64Neg:
case kSSEFloat64Sqrt:
case kSSEFloat64Round:
case kSSEFloat32Max:
......@@ -114,10 +110,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXFloat64Sub:
case kAVXFloat64Mul:
case kAVXFloat64Div:
case kAVXFloat64Abs:
case kAVXFloat64Neg:
case kAVXFloat32Abs:
case kAVXFloat32Neg:
case kX64Float64Abs:
case kX64Float64Neg:
case kX64Float32Abs:
case kX64Float32Neg:
case kX64BitcastFI:
case kX64BitcastDL:
case kX64BitcastIF:
......@@ -451,18 +447,18 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kX64Imul32:
case kX64ImulHigh32:
case kX64UmulHigh32:
case kX64Float32Abs:
case kX64Float32Neg:
case kX64Float64Abs:
case kX64Float64Neg:
case kSSEFloat32Cmp:
case kSSEFloat32Add:
case kSSEFloat32Sub:
case kSSEFloat32Abs:
case kSSEFloat32Neg:
case kSSEFloat64Cmp:
case kSSEFloat64Add:
case kSSEFloat64Sub:
case kSSEFloat64Max:
case kSSEFloat64Min:
case kSSEFloat64Abs:
case kSSEFloat64Neg:
return 3;
case kSSEFloat32Mul:
case kSSEFloat32ToFloat64:
......
......@@ -1679,15 +1679,12 @@ void VisitFloatBinop(InstructionSelector* selector, Node* node,
}
void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
ArchOpcode opcode) {
X64OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempDoubleRegister()};
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input),
arraysize(temps), temps);
selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input));
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
arraysize(temps), temps);
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input));
}
}
......@@ -1827,7 +1824,7 @@ void InstructionSelector::VisitFloat32Div(Node* node) {
}
void InstructionSelector::VisitFloat32Abs(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat32Abs, kSSEFloat32Abs);
VisitFloatUnop(this, node, node->InputAt(0), kX64Float32Abs);
}
void InstructionSelector::VisitFloat32Max(Node* node) {
......@@ -1871,7 +1868,7 @@ void InstructionSelector::VisitFloat64Min(Node* node) {
}
void InstructionSelector::VisitFloat64Abs(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat64Abs, kSSEFloat64Abs);
VisitFloatUnop(this, node, node->InputAt(0), kX64Float64Abs);
}
void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
......@@ -1879,11 +1876,11 @@ void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
}
void InstructionSelector::VisitFloat32Neg(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat32Neg, kSSEFloat32Neg);
VisitFloatUnop(this, node, node->InputAt(0), kX64Float32Neg);
}
void InstructionSelector::VisitFloat64Neg(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat64Neg, kSSEFloat64Neg);
VisitFloatUnop(this, node, node->InputAt(0), kX64Float64Neg);
}
void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
......@@ -3285,15 +3282,11 @@ void InstructionSelector::VisitS128AndNot(Node* node) {
}
void InstructionSelector::VisitF64x2Abs(Node* node) {
X64OperandGenerator g(this);
Emit(kX64F64x2Abs, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)));
VisitFloatUnop(this, node, node->InputAt(0), kX64F64x2Abs);
}
void InstructionSelector::VisitF64x2Neg(Node* node) {
X64OperandGenerator g(this);
Emit(kX64F64x2Neg, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)));
VisitFloatUnop(this, node, node->InputAt(0), kX64F64x2Neg);
}
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
......
......@@ -3484,28 +3484,12 @@ void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
Psrld(kScratchDoubleReg, static_cast<byte>(1));
Andps(dst.fp(), kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psrld(dst.fp(), static_cast<byte>(1));
Andps(dst.fp(), src.fp());
}
Absps(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
Pslld(kScratchDoubleReg, byte{31});
Xorps(dst.fp(), kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Pslld(dst.fp(), byte{31});
Xorps(dst.fp(), src.fp());
}
Negps(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
......@@ -3640,28 +3624,12 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
Psrlq(kScratchDoubleReg, byte{1});
Andpd(dst.fp(), kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psrlq(dst.fp(), byte{1});
Andpd(dst.fp(), src.fp());
}
Abspd(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
Psllq(kScratchDoubleReg, static_cast<byte>(63));
Xorpd(dst.fp(), kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psllq(dst.fp(), static_cast<byte>(63));
Xorpd(dst.fp(), src.fp());
}
Negpd(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
......
......@@ -893,7 +893,7 @@ void TestFloat32x4Abs(MacroAssembler* masm, Label* exit, float x, float y,
__ Movss(Operand(rsp, 3 * kFloatSize), xmm4);
__ Movups(xmm0, Operand(rsp, 0));
__ Absps(xmm0);
__ Absps(xmm0, xmm0);
__ Movups(Operand(rsp, 0), xmm0);
__ incq(rax);
......@@ -930,7 +930,7 @@ void TestFloat32x4Neg(MacroAssembler* masm, Label* exit, float x, float y,
__ Movss(Operand(rsp, 3 * kFloatSize), xmm4);
__ Movups(xmm0, Operand(rsp, 0));
__ Negps(xmm0);
__ Negps(xmm0, xmm0);
__ Movups(Operand(rsp, 0), xmm0);
__ incq(rax);
......@@ -962,7 +962,7 @@ void TestFloat64x2Abs(MacroAssembler* masm, Label* exit, double x, double y) {
__ Movsd(Operand(rsp, 1 * kDoubleSize), xmm2);
__ movupd(xmm0, Operand(rsp, 0));
__ Abspd(xmm0);
__ Abspd(xmm0, xmm0);
__ movupd(Operand(rsp, 0), xmm0);
__ incq(rax);
......@@ -986,7 +986,7 @@ void TestFloat64x2Neg(MacroAssembler* masm, Label* exit, double x, double y) {
__ Movsd(Operand(rsp, 1 * kDoubleSize), xmm2);
__ movupd(xmm0, Operand(rsp, 0));
__ Negpd(xmm0);
__ Negpd(xmm0, xmm0);
__ movupd(Operand(rsp, 0), xmm0);
__ incq(rax);
......
......@@ -1579,7 +1579,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat32Abs, s[0]->arch_opcode());
EXPECT_EQ(kX64Float32Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......@@ -1594,7 +1594,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
m.Return(n);
Stream s = m.Build(AVX);
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kAVXFloat32Abs, s[0]->arch_opcode());
EXPECT_EQ(kX64Float32Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......@@ -1612,7 +1612,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
m.Return(n);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat64Abs, s[0]->arch_opcode());
EXPECT_EQ(kX64Float64Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......@@ -1627,7 +1627,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
m.Return(n);
Stream s = m.Build(AVX);
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kAVXFloat64Abs, s[0]->arch_opcode());
EXPECT_EQ(kX64Float64Abs, s[0]->arch_opcode());
ASSERT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
ASSERT_EQ(1U, s[0]->OutputCount());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment