Commit c929b7a9 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Small optimization to v128.and_not

If AVX is supported, don't need dst == src, this can save a move.

Move the Andnps macro around, and update callsites in Liftoff.

Bug: v8:11190
Change-Id: I7307b70943fdd91550d608cb91e9890f23d1b3ad
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2750726Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73386}
parent 104283a2
......@@ -420,7 +420,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO(Punpcklqdq, punpcklqdq)
AVX_OP3_XO(Pxor, pxor)
AVX_OP3_XO(Andps, andps)
AVX_OP3_XO(Andnps, andnps)
AVX_OP3_XO(Andpd, andpd)
AVX_OP3_XO(Xorps, xorps)
AVX_OP3_XO(Xorpd, xorpd)
......@@ -476,6 +475,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
AVX_PACKED_OP3(Unpcklps, unpcklps)
AVX_PACKED_OP3(Andnps, andnps)
AVX_PACKED_OP3(Addps, addps)
AVX_PACKED_OP3(Addpd, addpd)
AVX_PACKED_OP3(Subps, subps)
......
......@@ -3617,12 +3617,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32S128AndNot: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The inputs have been inverted by instruction selector, so we can call
// andnps here without any modifications.
XMMRegister src1 = i.InputSimd128Register(1);
__ Andnps(dst, src1);
__ Andnps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kIA32I8x16Swizzle: {
......
......@@ -2515,8 +2515,10 @@ void InstructionSelector::VisitS128Select(Node* node) {
void InstructionSelector::VisitS128AndNot(Node* node) {
IA32OperandGenerator g(this);
// andnps a b does ~a & b, but we want a & !b, so flip the input.
Emit(kIA32S128AndNot, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0)));
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kIA32S128AndNot, dst, g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)));
}
#define VISIT_SIMD_SPLAT(Type) \
......
......@@ -4134,7 +4134,7 @@ void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
Orps(liftoff::kScratchDoubleReg, dst.fp());
Psrld(dst.fp(), dst.fp(), byte{10});
Andnps(dst.fp(), liftoff::kScratchDoubleReg);
Andnps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -4165,7 +4165,7 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
Cmpunordps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
Psrld(dst.fp(), dst.fp(), byte{10});
Andnps(dst.fp(), liftoff::kScratchDoubleReg);
Andnps(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment