Commit c929b7a9 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Small optimization to v128.and_not

If AVX is supported, don't need dst == src, this can save a move.

Move the Andnps macro around, and update callsites in Liftoff.

Bug: v8:11190
Change-Id: I7307b70943fdd91550d608cb91e9890f23d1b3ad
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2750726Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73386}
parent 104283a2
...@@ -420,7 +420,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -420,7 +420,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO(Punpcklqdq, punpcklqdq) AVX_OP3_XO(Punpcklqdq, punpcklqdq)
AVX_OP3_XO(Pxor, pxor) AVX_OP3_XO(Pxor, pxor)
AVX_OP3_XO(Andps, andps) AVX_OP3_XO(Andps, andps)
AVX_OP3_XO(Andnps, andnps)
AVX_OP3_XO(Andpd, andpd) AVX_OP3_XO(Andpd, andpd)
AVX_OP3_XO(Xorps, xorps) AVX_OP3_XO(Xorps, xorps)
AVX_OP3_XO(Xorpd, xorpd) AVX_OP3_XO(Xorpd, xorpd)
...@@ -476,6 +475,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -476,6 +475,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand) AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
AVX_PACKED_OP3(Unpcklps, unpcklps) AVX_PACKED_OP3(Unpcklps, unpcklps)
AVX_PACKED_OP3(Andnps, andnps)
AVX_PACKED_OP3(Addps, addps) AVX_PACKED_OP3(Addps, addps)
AVX_PACKED_OP3(Addpd, addpd) AVX_PACKED_OP3(Addpd, addpd)
AVX_PACKED_OP3(Subps, subps) AVX_PACKED_OP3(Subps, subps)
......
...@@ -3617,12 +3617,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3617,12 +3617,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kIA32S128AndNot: { case kIA32S128AndNot: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The inputs have been inverted by instruction selector, so we can call // The inputs have been inverted by instruction selector, so we can call
// andnps here without any modifications. // andnps here without any modifications.
XMMRegister src1 = i.InputSimd128Register(1); __ Andnps(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ Andnps(dst, src1); i.InputSimd128Register(1));
break; break;
} }
case kIA32I8x16Swizzle: { case kIA32I8x16Swizzle: {
......
...@@ -2515,8 +2515,10 @@ void InstructionSelector::VisitS128Select(Node* node) { ...@@ -2515,8 +2515,10 @@ void InstructionSelector::VisitS128Select(Node* node) {
void InstructionSelector::VisitS128AndNot(Node* node) { void InstructionSelector::VisitS128AndNot(Node* node) {
IA32OperandGenerator g(this); IA32OperandGenerator g(this);
// andnps a b does ~a & b, but we want a & !b, so flip the input. // andnps a b does ~a & b, but we want a & !b, so flip the input.
Emit(kIA32S128AndNot, g.DefineSameAsFirst(node), InstructionOperand dst =
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0))); IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kIA32S128AndNot, dst, g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)));
} }
#define VISIT_SIMD_SPLAT(Type) \ #define VISIT_SIMD_SPLAT(Type) \
......
...@@ -4134,7 +4134,7 @@ void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -4134,7 +4134,7 @@ void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg); Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
Orps(liftoff::kScratchDoubleReg, dst.fp()); Orps(liftoff::kScratchDoubleReg, dst.fp());
Psrld(dst.fp(), dst.fp(), byte{10}); Psrld(dst.fp(), dst.fp(), byte{10});
Andnps(dst.fp(), liftoff::kScratchDoubleReg); Andnps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
...@@ -4165,7 +4165,7 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -4165,7 +4165,7 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic. // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg); Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
Psrld(dst.fp(), dst.fp(), byte{10}); Psrld(dst.fp(), dst.fp(), byte{10});
Andnps(dst.fp(), liftoff::kScratchDoubleReg); Andnps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment