Commit 89d22866 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[ia32][x64] Share Absps/Abspd/Negps/Negpd implementation

Move this from macro-assembler-x64 to shared-macro-assembler, and use
this implementation for ia32 (TurboFan and Liftoff).

Bug: v8:11589
Change-Id: If851560c8db1293924ca024725609c399c553a4a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3124099
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76803}
parent d23dfb90
......@@ -7,6 +7,7 @@
#include "src/base/macros.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/external-reference.h"
#include "src/codegen/turbo-assembler.h"
#if V8_TARGET_ARCH_IA32
......@@ -461,6 +462,27 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
using SharedTurboAssembler::SharedTurboAssembler;
public:
void Abspd(XMMRegister dst, XMMRegister src, Register tmp) {
FloatUnop(dst, src, tmp, &SharedTurboAssembler::Andps,
ExternalReference::address_of_double_abs_constant());
}
void Absps(XMMRegister dst, XMMRegister src, Register tmp) {
FloatUnop(dst, src, tmp, &SharedTurboAssembler::Andps,
ExternalReference::address_of_float_abs_constant());
}
void Negpd(XMMRegister dst, XMMRegister src, Register tmp) {
FloatUnop(dst, src, tmp, &SharedTurboAssembler::Xorps,
ExternalReference::address_of_double_neg_constant());
}
void Negps(XMMRegister dst, XMMRegister src, Register tmp) {
FloatUnop(dst, src, tmp, &SharedTurboAssembler::Xorps,
ExternalReference::address_of_float_neg_constant());
}
#undef FLOAT_UNOP
void F64x2ConvertLowI32x4U(XMMRegister dst, XMMRegister src,
Register scratch) {
ASM_CODE_COMMENT(this);
......@@ -761,6 +783,18 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
Register scratch) {
return impl()->ExternalReferenceAsOperand(reference, scratch);
}
using FloatInstruction = void (SharedTurboAssembler::*)(XMMRegister,
XMMRegister, Operand);
void FloatUnop(XMMRegister dst, XMMRegister src, Register tmp,
FloatInstruction op, ExternalReference ext) {
if (!CpuFeatures::IsSupported(AVX) && (dst != src)) {
movaps(dst, src);
src = dst;
}
SharedTurboAssembler* assm = this;
(assm->*op)(dst, src, ExternalReferenceAsOperand(ext, tmp));
}
};
} // namespace internal
......
......@@ -2233,46 +2233,6 @@ void TurboAssembler::Pinsrq(XMMRegister dst, XMMRegister src1, Operand src2,
imm8, load_pc_offset, {SSE4_1});
}
void TurboAssembler::Absps(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Andps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_float_abs_constant()));
}
void TurboAssembler::Negps(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Xorps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_float_neg_constant()));
}
void TurboAssembler::Abspd(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Andps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_double_abs_constant()));
}
void TurboAssembler::Negpd(XMMRegister dst, XMMRegister src) {
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
src = dst;
}
Xorps(dst, src,
ExternalReferenceAsOperand(
ExternalReference::address_of_double_neg_constant()));
}
void TurboAssembler::Lzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);
......
......@@ -436,11 +436,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
void Pinsrq(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8,
uint32_t* load_pc_offset = nullptr);
void Absps(XMMRegister dst, XMMRegister src);
void Negps(XMMRegister dst, XMMRegister src);
void Abspd(XMMRegister dst, XMMRegister src);
void Negpd(XMMRegister dst, XMMRegister src);
void CompareRoot(Register with, RootIndex index);
void CompareRoot(Operand with, RootIndex index);
......
......@@ -1525,59 +1525,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, byte{33});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
__ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.TempRegister(0));
break;
}
case kFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, byte{31});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
__ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.TempRegister(0));
break;
}
case kFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrlq(kScratchDoubleReg, byte{1});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
__ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.TempRegister(0));
break;
}
case kFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllq(kScratchDoubleReg, byte{63});
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
} else {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
}
__ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.TempRegister(0));
break;
}
case kSSEFloat64SilenceNaN:
......@@ -2161,34 +2125,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kIA32F32x4Abs: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, byte{1});
__ Andps(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Psrld(dst, dst, byte{1});
__ Andps(dst, src);
}
break;
}
case kIA32F32x4Neg: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, byte{31});
__ Xorps(dst, kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Pslld(dst, dst, byte{31});
__ Xorps(dst, src);
}
break;
}
case kIA32F32x4Sqrt: {
__ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
......
......@@ -152,8 +152,6 @@ namespace compiler {
V(IA32Insertps) \
V(IA32F32x4SConvertI32x4) \
V(IA32F32x4UConvertI32x4) \
V(IA32F32x4Abs) \
V(IA32F32x4Neg) \
V(IA32F32x4Sqrt) \
V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \
......
......@@ -137,8 +137,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Insertps:
case kIA32F32x4SConvertI32x4:
case kIA32F32x4UConvertI32x4:
case kIA32F32x4Abs:
case kIA32F32x4Neg:
case kIA32F32x4Sqrt:
case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox:
......
......@@ -329,10 +329,13 @@ void VisitRROFloat(InstructionSelector* selector, Node* node,
void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempRegister()};
if (selector->IsSupported(AVX)) {
selector->Emit(opcode, g.DefineAsRegister(node), g.Use(input));
selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input),
arraysize(temps), temps);
} else {
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input));
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
arraysize(temps), temps);
}
}
......@@ -1195,6 +1198,8 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float64Abs, kFloat64Abs) \
V(Float32Neg, kFloat32Neg) \
V(Float64Neg, kFloat64Neg) \
V(F32x4Abs, kFloat32Abs) \
V(F32x4Neg, kFloat32Neg) \
V(F64x2Abs, kFloat64Abs) \
V(F64x2Neg, kFloat64Neg)
......@@ -2341,8 +2346,6 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(F64x2ConvertLowI32x4S) \
V(F64x2PromoteLowF32x4) \
V(F32x4DemoteF64x2Zero) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4Sqrt) \
V(F32x4SConvertI32x4) \
V(F32x4RecipApprox) \
......
......@@ -2194,21 +2194,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kX64Float32Abs: {
__ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
kScratchRegister);
break;
}
case kX64Float32Neg: {
__ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
kScratchRegister);
break;
}
case kX64F64x2Abs:
case kX64Float64Abs: {
__ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
kScratchRegister);
break;
}
case kX64F64x2Neg:
case kX64Float64Neg: {
__ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
__ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
kScratchRegister);
break;
}
case kSSEFloat64SilenceNaN:
......
......@@ -3933,28 +3933,14 @@ void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{1});
Andps(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psrld(dst.fp(), dst.fp(), byte{1});
Andps(dst.fp(), src.fp());
}
Register tmp = GetUnusedRegister(kGpReg, {}).gp();
Absps(dst.fp(), src.fp(), tmp);
}
void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Pslld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{31});
Xorps(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Pslld(dst.fp(), dst.fp(), byte{31});
Xorps(dst.fp(), src.fp());
}
Register tmp = GetUnusedRegister(kGpReg, {}).gp();
Negps(dst.fp(), src.fp(), tmp);
}
void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
......@@ -4089,28 +4075,14 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Psrlq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{1});
Andpd(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psrlq(dst.fp(), dst.fp(), byte{1});
Andpd(dst.fp(), src.fp());
}
Register tmp = GetUnusedRegister(kGpReg, {}).gp();
Abspd(dst.fp(), src.fp(), tmp);
}
void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Psllq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{63});
Xorpd(dst.fp(), liftoff::kScratchDoubleReg);
} else {
Pcmpeqd(dst.fp(), dst.fp());
Psllq(dst.fp(), dst.fp(), byte{63});
Xorpd(dst.fp(), src.fp());
}
Register tmp = GetUnusedRegister(kGpReg, {}).gp();
Negpd(dst.fp(), src.fp(), tmp);
}
void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
......
......@@ -3483,12 +3483,12 @@ void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Absps(dst.fp(), src.fp());
Absps(dst.fp(), src.fp(), kScratchRegister);
}
void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
Negps(dst.fp(), src.fp());
Negps(dst.fp(), src.fp(), kScratchRegister);
}
void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
......@@ -3623,12 +3623,12 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abspd(dst.fp(), src.fp());
Abspd(dst.fp(), src.fp(), kScratchRegister);
}
void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
Negpd(dst.fp(), src.fp());
Negpd(dst.fp(), src.fp(), kScratchRegister);
}
void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
......
......@@ -893,7 +893,7 @@ void TestFloat32x4Abs(MacroAssembler* masm, Label* exit, float x, float y,
__ Movss(Operand(rsp, 3 * kFloatSize), xmm4);
__ Movups(xmm0, Operand(rsp, 0));
__ Absps(xmm0, xmm0);
__ Absps(xmm0, xmm0, kScratchRegister);
__ Movups(Operand(rsp, 0), xmm0);
__ incq(rax);
......@@ -930,7 +930,7 @@ void TestFloat32x4Neg(MacroAssembler* masm, Label* exit, float x, float y,
__ Movss(Operand(rsp, 3 * kFloatSize), xmm4);
__ Movups(xmm0, Operand(rsp, 0));
__ Negps(xmm0, xmm0);
__ Negps(xmm0, xmm0, kScratchRegister);
__ Movups(Operand(rsp, 0), xmm0);
__ incq(rax);
......@@ -962,7 +962,7 @@ void TestFloat64x2Abs(MacroAssembler* masm, Label* exit, double x, double y) {
__ Movsd(Operand(rsp, 1 * kDoubleSize), xmm2);
__ movupd(xmm0, Operand(rsp, 0));
__ Abspd(xmm0, xmm0);
__ Abspd(xmm0, xmm0, kScratchRegister);
__ movupd(Operand(rsp, 0), xmm0);
__ incq(rax);
......@@ -986,7 +986,7 @@ void TestFloat64x2Neg(MacroAssembler* masm, Label* exit, double x, double y) {
__ Movsd(Operand(rsp, 1 * kDoubleSize), xmm2);
__ movupd(xmm0, Operand(rsp, 0));
__ Negpd(xmm0, xmm0);
__ Negpd(xmm0, xmm0, kScratchRegister);
__ movupd(Operand(rsp, 0), xmm0);
__ incq(rax);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment