Commit b0d79120 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Prototype sign select

Prototype i8x16, i16x8, i32x4, i64x2 sign select on x64 and interpreter.

Bug: v8:10983
Change-Id: I7d6f39a2cb4c2aefe31daac782978fe8b363dd1a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2486235
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70818}
parent fd12dfb9
...@@ -1066,6 +1066,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1066,6 +1066,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15) DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
#undef DECLARE_SSE4_INSTRUCTION #undef DECLARE_SSE4_INSTRUCTION
...@@ -1126,6 +1128,20 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1126,6 +1128,20 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_UNOP_AVX_INSTRUCTION) SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_UNOP_AVX_INSTRUCTION)
#undef DECLARE_SSSE3_UNOP_AVX_INSTRUCTION #undef DECLARE_SSSE3_UNOP_AVX_INSTRUCTION
void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask) {
vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0);
// The mask operand is encoded in bits[7:4] of the immediate byte.
emit(mask.code() << 4);
}
void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask) {
vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0);
// The mask operand is encoded in bits[7:4] of the immediate byte.
emit(mask.code() << 4);
}
void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask) { XMMRegister mask) {
vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0); vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0);
......
...@@ -1880,6 +1880,42 @@ void TurboAssembler::Pslld(XMMRegister dst, byte imm8) { ...@@ -1880,6 +1880,42 @@ void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
} }
} }
void TurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpblendvb(dst, src1, src2, mask);
} else {
DCHECK_EQ(dst, src1);
DCHECK_EQ(xmm0, mask);
pblendvb(dst, src2);
}
}
void TurboAssembler::Blendvps(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vblendvps(dst, src1, src2, mask);
} else {
DCHECK_EQ(dst, src1);
DCHECK_EQ(xmm0, mask);
blendvps(dst, src2);
}
}
void TurboAssembler::Blendvpd(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vblendvpd(dst, src1, src2, mask);
} else {
DCHECK_EQ(dst, src1);
DCHECK_EQ(xmm0, mask);
blendvpd(dst, src2);
}
}
void TurboAssembler::Psrld(XMMRegister dst, byte imm8) { void TurboAssembler::Psrld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
......
...@@ -540,6 +540,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -540,6 +540,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pslld(XMMRegister dst, byte imm8); void Pslld(XMMRegister dst, byte imm8);
void Psrld(XMMRegister dst, byte imm8); void Psrld(XMMRegister dst, byte imm8);
void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void CompareRoot(Register with, RootIndex index); void CompareRoot(Register with, RootIndex index);
void CompareRoot(Operand with, RootIndex index); void CompareRoot(Operand with, RootIndex index);
......
...@@ -2023,6 +2023,8 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -2023,6 +2023,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2ExtMulLowI32x4U(node); return MarkAsSimd128(node), VisitI64x2ExtMulLowI32x4U(node);
case IrOpcode::kI64x2ExtMulHighI32x4U: case IrOpcode::kI64x2ExtMulHighI32x4U:
return MarkAsSimd128(node), VisitI64x2ExtMulHighI32x4U(node); return MarkAsSimd128(node), VisitI64x2ExtMulHighI32x4U(node);
case IrOpcode::kI64x2SignSelect:
return MarkAsSimd128(node), VisitI64x2SignSelect(node);
case IrOpcode::kI32x4Splat: case IrOpcode::kI32x4Splat:
return MarkAsSimd128(node), VisitI32x4Splat(node); return MarkAsSimd128(node), VisitI32x4Splat(node);
case IrOpcode::kI32x4ExtractLane: case IrOpcode::kI32x4ExtractLane:
...@@ -2091,6 +2093,8 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -2091,6 +2093,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4ExtMulLowI16x8U(node); return MarkAsSimd128(node), VisitI32x4ExtMulLowI16x8U(node);
case IrOpcode::kI32x4ExtMulHighI16x8U: case IrOpcode::kI32x4ExtMulHighI16x8U:
return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8U(node); return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8U(node);
case IrOpcode::kI32x4SignSelect:
return MarkAsSimd128(node), VisitI32x4SignSelect(node);
case IrOpcode::kI16x8Splat: case IrOpcode::kI16x8Splat:
return MarkAsSimd128(node), VisitI16x8Splat(node); return MarkAsSimd128(node), VisitI16x8Splat(node);
case IrOpcode::kI16x8ExtractLaneU: case IrOpcode::kI16x8ExtractLaneU:
...@@ -2171,6 +2175,8 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -2171,6 +2175,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8ExtMulLowI8x16U(node); return MarkAsSimd128(node), VisitI16x8ExtMulLowI8x16U(node);
case IrOpcode::kI16x8ExtMulHighI8x16U: case IrOpcode::kI16x8ExtMulHighI8x16U:
return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16U(node); return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16U(node);
case IrOpcode::kI16x8SignSelect:
return MarkAsSimd128(node), VisitI16x8SignSelect(node);
case IrOpcode::kI8x16Splat: case IrOpcode::kI8x16Splat:
return MarkAsSimd128(node), VisitI8x16Splat(node); return MarkAsSimd128(node), VisitI8x16Splat(node);
case IrOpcode::kI8x16ExtractLaneU: case IrOpcode::kI8x16ExtractLaneU:
...@@ -2233,6 +2239,8 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -2233,6 +2239,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI8x16Abs(node); return MarkAsSimd128(node), VisitI8x16Abs(node);
case IrOpcode::kI8x16BitMask: case IrOpcode::kI8x16BitMask:
return MarkAsWord32(node), VisitI8x16BitMask(node); return MarkAsWord32(node), VisitI8x16BitMask(node);
case IrOpcode::kI8x16SignSelect:
return MarkAsSimd128(node), VisitI8x16SignSelect(node);
case IrOpcode::kS128Const: case IrOpcode::kS128Const:
return MarkAsSimd128(node), VisitS128Const(node); return MarkAsSimd128(node), VisitS128Const(node);
case IrOpcode::kS128Zero: case IrOpcode::kS128Zero:
...@@ -2771,6 +2779,12 @@ void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); } ...@@ -2771,6 +2779,12 @@ void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10997) Prototype i64x2.bitmask. // TODO(v8:10997) Prototype i64x2.bitmask.
void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10983) Prototyping sign select.
void InstructionSelector::VisitI8x16SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -3511,6 +3511,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3511,6 +3511,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0)); __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
break; break;
} }
case kX64I8x16SignSelect: {
__ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64I16x8SignSelect: {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsraw(kScratchDoubleReg, i.InputSimd128Register(2), 15);
__ vpblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchDoubleReg);
} else {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister mask = i.InputSimd128Register(2);
DCHECK_EQ(xmm0, mask);
__ movapd(kScratchDoubleReg, mask);
__ pxor(mask, mask);
__ pcmpgtw(mask, kScratchDoubleReg);
__ pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(1));
// Restore mask.
__ movapd(mask, kScratchDoubleReg);
}
break;
}
case kX64I32x4SignSelect: {
__ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64I64x2SignSelect: {
__ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64S128And: { case kX64S128And: {
ASSEMBLE_SIMD_BINOP(pand); ASSEMBLE_SIMD_BINOP(pand);
break; break;
......
...@@ -211,6 +211,7 @@ namespace compiler { ...@@ -211,6 +211,7 @@ namespace compiler {
V(X64I64x2Mul) \ V(X64I64x2Mul) \
V(X64I64x2Eq) \ V(X64I64x2Eq) \
V(X64I64x2ShrU) \ V(X64I64x2ShrU) \
V(X64I64x2SignSelect) \
V(X64I32x4Splat) \ V(X64I32x4Splat) \
V(X64I32x4ExtractLane) \ V(X64I32x4ExtractLane) \
V(X64I32x4SConvertF32x4) \ V(X64I32x4SConvertF32x4) \
...@@ -240,6 +241,7 @@ namespace compiler { ...@@ -240,6 +241,7 @@ namespace compiler {
V(X64I32x4Abs) \ V(X64I32x4Abs) \
V(X64I32x4BitMask) \ V(X64I32x4BitMask) \
V(X64I32x4DotI16x8S) \ V(X64I32x4DotI16x8S) \
V(X64I32x4SignSelect) \
V(X64I16x8Splat) \ V(X64I16x8Splat) \
V(X64I16x8ExtractLaneS) \ V(X64I16x8ExtractLaneS) \
V(X64I16x8SConvertI8x16Low) \ V(X64I16x8SConvertI8x16Low) \
...@@ -273,6 +275,7 @@ namespace compiler { ...@@ -273,6 +275,7 @@ namespace compiler {
V(X64I16x8RoundingAverageU) \ V(X64I16x8RoundingAverageU) \
V(X64I16x8Abs) \ V(X64I16x8Abs) \
V(X64I16x8BitMask) \ V(X64I16x8BitMask) \
V(X64I16x8SignSelect) \
V(X64I8x16Splat) \ V(X64I8x16Splat) \
V(X64I8x16ExtractLaneS) \ V(X64I8x16ExtractLaneS) \
V(X64Pinsrb) \ V(X64Pinsrb) \
...@@ -307,6 +310,7 @@ namespace compiler { ...@@ -307,6 +310,7 @@ namespace compiler {
V(X64I8x16RoundingAverageU) \ V(X64I8x16RoundingAverageU) \
V(X64I8x16Abs) \ V(X64I8x16Abs) \
V(X64I8x16BitMask) \ V(X64I8x16BitMask) \
V(X64I8x16SignSelect) \
V(X64S128Const) \ V(X64S128Const) \
V(X64S128Zero) \ V(X64S128Zero) \
V(X64S128AllOnes) \ V(X64S128AllOnes) \
......
...@@ -187,6 +187,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -187,6 +187,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I64x2Mul: case kX64I64x2Mul:
case kX64I64x2Eq: case kX64I64x2Eq:
case kX64I64x2ShrU: case kX64I64x2ShrU:
case kX64I64x2SignSelect:
case kX64I32x4Splat: case kX64I32x4Splat:
case kX64I32x4ExtractLane: case kX64I32x4ExtractLane:
case kX64I32x4SConvertF32x4: case kX64I32x4SConvertF32x4:
...@@ -216,6 +217,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -216,6 +217,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4Abs: case kX64I32x4Abs:
case kX64I32x4BitMask: case kX64I32x4BitMask:
case kX64I32x4DotI16x8S: case kX64I32x4DotI16x8S:
case kX64I32x4SignSelect:
case kX64I16x8Splat: case kX64I16x8Splat:
case kX64I16x8ExtractLaneS: case kX64I16x8ExtractLaneS:
case kX64I16x8SConvertI8x16Low: case kX64I16x8SConvertI8x16Low:
...@@ -249,6 +251,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -249,6 +251,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8RoundingAverageU: case kX64I16x8RoundingAverageU:
case kX64I16x8Abs: case kX64I16x8Abs:
case kX64I16x8BitMask: case kX64I16x8BitMask:
case kX64I16x8SignSelect:
case kX64I8x16Splat: case kX64I8x16Splat:
case kX64I8x16ExtractLaneS: case kX64I8x16ExtractLaneS:
case kX64I8x16SConvertI16x8: case kX64I8x16SConvertI16x8:
...@@ -277,6 +280,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -277,6 +280,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16RoundingAverageU: case kX64I8x16RoundingAverageU:
case kX64I8x16Abs: case kX64I8x16Abs:
case kX64I8x16BitMask: case kX64I8x16BitMask:
case kX64I8x16SignSelect:
case kX64S128And: case kX64S128And:
case kX64S128Or: case kX64S128Or:
case kX64S128Xor: case kX64S128Xor:
......
...@@ -3139,6 +3139,40 @@ void InstructionSelector::VisitS128Select(Node* node) { ...@@ -3139,6 +3139,40 @@ void InstructionSelector::VisitS128Select(Node* node) {
g.UseRegister(node->InputAt(2))); g.UseRegister(node->InputAt(2)));
} }
namespace {
void VisitSignSelect(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
X64OperandGenerator g(selector);
// signselect(x, y, -1) = x
// pblendvb(dst, x, y, -1) = dst <- y, so we need to swap x and y.
if (selector->IsSupported(AVX)) {
selector->Emit(
opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(2)));
} else {
selector->Emit(
opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(2), xmm0));
}
}
} // namespace
void InstructionSelector::VisitI8x16SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I8x16SignSelect);
}
void InstructionSelector::VisitI16x8SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I16x8SignSelect);
}
void InstructionSelector::VisitI32x4SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I32x4SignSelect);
}
void InstructionSelector::VisitI64x2SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I64x2SignSelect);
}
void InstructionSelector::VisitS128AndNot(Node* node) { void InstructionSelector::VisitS128AndNot(Node* node) {
X64OperandGenerator g(this); X64OperandGenerator g(this);
// andnps a b does ~a & b, but we want a & !b, so flip the input. // andnps a b does ~a & b, but we want a & !b, so flip the input.
......
...@@ -429,6 +429,7 @@ ShiftKind ShiftKindOf(Operator const* op) { ...@@ -429,6 +429,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I64x2ExtMulHighI32x4S, Operator::kCommutative, 2, 0, 1) \ V(I64x2ExtMulHighI32x4S, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulLowI32x4U, Operator::kCommutative, 2, 0, 1) \ V(I64x2ExtMulLowI32x4U, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulHighI32x4U, Operator::kCommutative, 2, 0, 1) \ V(I64x2ExtMulHighI32x4U, Operator::kCommutative, 2, 0, 1) \
V(I64x2SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \ V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \ V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \ V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \
...@@ -461,6 +462,7 @@ ShiftKind ShiftKindOf(Operator const* op) { ...@@ -461,6 +462,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I32x4ExtMulHighI16x8S, Operator::kCommutative, 2, 0, 1) \ V(I32x4ExtMulHighI16x8S, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulLowI16x8U, Operator::kCommutative, 2, 0, 1) \ V(I32x4ExtMulLowI16x8U, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulHighI16x8U, Operator::kCommutative, 2, 0, 1) \ V(I32x4ExtMulHighI16x8U, Operator::kCommutative, 2, 0, 1) \
V(I32x4SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \ V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \ V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \ V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \
...@@ -498,6 +500,7 @@ ShiftKind ShiftKindOf(Operator const* op) { ...@@ -498,6 +500,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I16x8ExtMulHighI8x16S, Operator::kCommutative, 2, 0, 1) \ V(I16x8ExtMulHighI8x16S, Operator::kCommutative, 2, 0, 1) \
V(I16x8ExtMulLowI8x16U, Operator::kCommutative, 2, 0, 1) \ V(I16x8ExtMulLowI8x16U, Operator::kCommutative, 2, 0, 1) \
V(I16x8ExtMulHighI8x16U, Operator::kCommutative, 2, 0, 1) \ V(I16x8ExtMulHighI8x16U, Operator::kCommutative, 2, 0, 1) \
V(I16x8SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \ V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \
...@@ -526,6 +529,7 @@ ShiftKind ShiftKindOf(Operator const* op) { ...@@ -526,6 +529,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I8x16Popcnt, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Popcnt, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Abs, Operator::kNoProperties, 1, 0, 1) \ V(I8x16Abs, Operator::kNoProperties, 1, 0, 1) \
V(I8x16BitMask, Operator::kNoProperties, 1, 0, 1) \ V(I8x16BitMask, Operator::kNoProperties, 1, 0, 1) \
V(I8x16SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(S128Load, Operator::kNoProperties, 2, 0, 1) \ V(S128Load, Operator::kNoProperties, 2, 0, 1) \
V(S128Store, Operator::kNoProperties, 3, 0, 1) \ V(S128Store, Operator::kNoProperties, 3, 0, 1) \
V(S128Zero, Operator::kNoProperties, 0, 0, 1) \ V(S128Zero, Operator::kNoProperties, 0, 0, 1) \
......
...@@ -675,6 +675,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -675,6 +675,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2ExtMulHighI32x4S(); const Operator* I64x2ExtMulHighI32x4S();
const Operator* I64x2ExtMulLowI32x4U(); const Operator* I64x2ExtMulLowI32x4U();
const Operator* I64x2ExtMulHighI32x4U(); const Operator* I64x2ExtMulHighI32x4U();
const Operator* I64x2SignSelect();
const Operator* I32x4Splat(); const Operator* I32x4Splat();
const Operator* I32x4ExtractLane(int32_t); const Operator* I32x4ExtractLane(int32_t);
...@@ -711,6 +712,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -711,6 +712,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4ExtMulHighI16x8S(); const Operator* I32x4ExtMulHighI16x8S();
const Operator* I32x4ExtMulLowI16x8U(); const Operator* I32x4ExtMulLowI16x8U();
const Operator* I32x4ExtMulHighI16x8U(); const Operator* I32x4ExtMulHighI16x8U();
const Operator* I32x4SignSelect();
const Operator* I16x8Splat(); const Operator* I16x8Splat();
const Operator* I16x8ExtractLaneU(int32_t); const Operator* I16x8ExtractLaneU(int32_t);
...@@ -753,6 +755,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -753,6 +755,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8ExtMulHighI8x16S(); const Operator* I16x8ExtMulHighI8x16S();
const Operator* I16x8ExtMulLowI8x16U(); const Operator* I16x8ExtMulLowI8x16U();
const Operator* I16x8ExtMulHighI8x16U(); const Operator* I16x8ExtMulHighI8x16U();
const Operator* I16x8SignSelect();
const Operator* I8x16Splat(); const Operator* I8x16Splat();
const Operator* I8x16ExtractLaneU(int32_t); const Operator* I8x16ExtractLaneU(int32_t);
...@@ -786,6 +789,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -786,6 +789,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I8x16Popcnt(); const Operator* I8x16Popcnt();
const Operator* I8x16Abs(); const Operator* I8x16Abs();
const Operator* I8x16BitMask(); const Operator* I8x16BitMask();
const Operator* I8x16SignSelect();
const Operator* S128Load(); const Operator* S128Load();
const Operator* S128Store(); const Operator* S128Store();
......
...@@ -835,6 +835,7 @@ ...@@ -835,6 +835,7 @@
V(I64x2ExtMulHighI32x4S) \ V(I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U) \ V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \ V(I64x2ExtMulHighI32x4U) \
V(I64x2SignSelect) \
V(I32x4Splat) \ V(I32x4Splat) \
V(I32x4ExtractLane) \ V(I32x4ExtractLane) \
V(I32x4ReplaceLane) \ V(I32x4ReplaceLane) \
...@@ -873,6 +874,7 @@ ...@@ -873,6 +874,7 @@
V(I32x4ExtMulHighI16x8S) \ V(I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U) \ V(I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U) \ V(I32x4ExtMulHighI16x8U) \
V(I32x4SignSelect) \
V(I16x8Splat) \ V(I16x8Splat) \
V(I16x8ExtractLaneU) \ V(I16x8ExtractLaneU) \
V(I16x8ExtractLaneS) \ V(I16x8ExtractLaneS) \
...@@ -917,6 +919,7 @@ ...@@ -917,6 +919,7 @@
V(I16x8ExtMulHighI8x16S) \ V(I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U) \ V(I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U) \ V(I16x8ExtMulHighI8x16U) \
V(I16x8SignSelect) \
V(I8x16Splat) \ V(I8x16Splat) \
V(I8x16ExtractLaneU) \ V(I8x16ExtractLaneU) \
V(I8x16ExtractLaneS) \ V(I8x16ExtractLaneS) \
...@@ -952,6 +955,7 @@ ...@@ -952,6 +955,7 @@
V(I8x16Popcnt) \ V(I8x16Popcnt) \
V(I8x16Abs) \ V(I8x16Abs) \
V(I8x16BitMask) \ V(I8x16BitMask) \
V(I8x16SignSelect) \
V(S128Load) \ V(S128Load) \
V(S128Store) \ V(S128Store) \
V(S128Zero) \ V(S128Zero) \
......
...@@ -4681,6 +4681,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -4681,6 +4681,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2ExtMulHighI32x4U: case wasm::kExprI64x2ExtMulHighI32x4U:
return graph()->NewNode(mcgraph()->machine()->I64x2ExtMulHighI32x4U(), return graph()->NewNode(mcgraph()->machine()->I64x2ExtMulHighI32x4U(),
inputs[0], inputs[1]); inputs[0], inputs[1]);
case wasm::kExprI64x2SignSelect:
return graph()->NewNode(mcgraph()->machine()->I64x2SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI32x4Splat: case wasm::kExprI32x4Splat:
return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]);
case wasm::kExprI32x4SConvertF32x4: case wasm::kExprI32x4SConvertF32x4:
...@@ -4785,6 +4788,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -4785,6 +4788,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI32x4ExtMulHighI16x8U: case wasm::kExprI32x4ExtMulHighI16x8U:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtMulHighI16x8U(), return graph()->NewNode(mcgraph()->machine()->I32x4ExtMulHighI16x8U(),
inputs[0], inputs[1]); inputs[0], inputs[1]);
case wasm::kExprI32x4SignSelect:
return graph()->NewNode(mcgraph()->machine()->I32x4SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI16x8Splat: case wasm::kExprI16x8Splat:
return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
case wasm::kExprI16x8SConvertI8x16Low: case wasm::kExprI16x8SConvertI8x16Low:
...@@ -4904,6 +4910,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -4904,6 +4910,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8ExtMulHighI8x16U: case wasm::kExprI16x8ExtMulHighI8x16U:
return graph()->NewNode(mcgraph()->machine()->I16x8ExtMulHighI8x16U(), return graph()->NewNode(mcgraph()->machine()->I16x8ExtMulHighI8x16U(),
inputs[0], inputs[1]); inputs[0], inputs[1]);
case wasm::kExprI16x8SignSelect:
return graph()->NewNode(mcgraph()->machine()->I16x8SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI8x16Splat: case wasm::kExprI8x16Splat:
return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]);
case wasm::kExprI8x16Neg: case wasm::kExprI8x16Neg:
...@@ -4995,6 +5004,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -4995,6 +5004,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return graph()->NewNode(mcgraph()->machine()->I8x16Abs(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->I8x16Abs(), inputs[0]);
case wasm::kExprI8x16BitMask: case wasm::kExprI8x16BitMask:
return graph()->NewNode(mcgraph()->machine()->I8x16BitMask(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->I8x16BitMask(), inputs[0]);
case wasm::kExprI8x16SignSelect:
return graph()->NewNode(mcgraph()->machine()->I8x16SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprS128And: case wasm::kExprS128And:
return graph()->NewNode(mcgraph()->machine()->S128And(), inputs[0], return graph()->NewNode(mcgraph()->machine()->S128And(), inputs[0],
inputs[1]); inputs[1]);
......
...@@ -1047,6 +1047,13 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -1047,6 +1047,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightOperand(current); current += PrintRightOperand(current);
AppendToBuffer(",0x%x", *current++); AppendToBuffer(",0x%x", *current++);
break; break;
case 0x4A: {
AppendToBuffer("vblendvps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister((*current++) >> 4));
break;
}
case 0x4B: { case 0x4B: {
AppendToBuffer("vblendvpd %s,%s,", NameOfXMMRegister(regop), AppendToBuffer("vblendvpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
...@@ -1054,6 +1061,13 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -1054,6 +1061,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
AppendToBuffer(",%s", NameOfXMMRegister((*current++) >> 4)); AppendToBuffer(",%s", NameOfXMMRegister((*current++) >> 4));
break; break;
} }
case 0x4C: {
AppendToBuffer("vpblendvb %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister((*current++) >> 4));
break;
}
default: default:
UnimplementedInstruction(); UnimplementedInstruction();
} }
...@@ -2353,6 +2367,18 @@ int DisassemblerX64::ThreeByteOpcodeInstruction(byte* data) { ...@@ -2353,6 +2367,18 @@ int DisassemblerX64::ThreeByteOpcodeInstruction(byte* data) {
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
if (second_byte == 0x38) { if (second_byte == 0x38) {
switch (third_byte) { switch (third_byte) {
case 0x10: {
AppendToBuffer("pblendvb %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",<xmm0>");
break;
}
case 0x14: {
AppendToBuffer("blendvps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",<xmm0>");
break;
}
case 0x15: { case 0x15: {
current += PrintOperands("blendvpd", XMMREG_XMMOPER_OP_ORDER, current); current += PrintOperands("blendvpd", XMMREG_XMMOPER_OP_ORDER, current);
AppendToBuffer(",<xmm0>"); AppendToBuffer(",<xmm0>");
......
...@@ -354,6 +354,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -354,6 +354,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(I32x4, ExtMulHighI16x8, "extmul_high_i16x8") CASE_SIGN_OP(I32x4, ExtMulHighI16x8, "extmul_high_i16x8")
CASE_SIGN_OP(I64x2, ExtMulLowI32x4, "extmul_low_i32x4") CASE_SIGN_OP(I64x2, ExtMulLowI32x4, "extmul_low_i32x4")
CASE_SIGN_OP(I64x2, ExtMulHighI32x4, "extmul_high_i32x4") CASE_SIGN_OP(I64x2, ExtMulHighI32x4, "extmul_high_i32x4")
CASE_SIMDI_OP(SignSelect, "signselect")
CASE_I64x2_OP(SignSelect, "signselect")
// Atomic operations. // Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify") CASE_OP(AtomicNotify, "atomic.notify")
......
...@@ -474,6 +474,10 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, ...@@ -474,6 +474,10 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \ #define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \ V(I8x16Mul, 0xfd75, s_ss) \
V(I8x16Popcnt, 0xfd7c, s_s) \ V(I8x16Popcnt, 0xfd7c, s_s) \
V(I8x16SignSelect, 0xfd7d, s_sss) \
V(I16x8SignSelect, 0xfd7e, s_sss) \
V(I32x4SignSelect, 0xfd7f, s_sss) \
V(I64x2SignSelect, 0xfd94, s_sss) \
V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \ V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \
V(I16x8ExtMulLowI8x16S, 0xfd9a, s_ss) \ V(I16x8ExtMulLowI8x16S, 0xfd9a, s_ss) \
V(I16x8ExtMulHighI8x16S, 0xfd9d, s_ss) \ V(I16x8ExtMulHighI8x16S, 0xfd9d, s_ss) \
......
...@@ -590,6 +590,10 @@ TEST(DisasmX64) { ...@@ -590,6 +590,10 @@ TEST(DisasmX64) {
__ cvtps2dq(xmm5, Operand(rdx, 4)); __ cvtps2dq(xmm5, Operand(rdx, 4));
__ cvtdq2ps(xmm5, xmm1); __ cvtdq2ps(xmm5, xmm1);
__ cvtdq2ps(xmm5, Operand(rdx, 4)); __ cvtdq2ps(xmm5, Operand(rdx, 4));
__ pblendvb(xmm5, xmm1);
__ blendvps(xmm5, xmm1);
__ blendvps(xmm5, Operand(rdx, 4));
__ blendvpd(xmm5, xmm1); __ blendvpd(xmm5, xmm1);
__ blendvpd(xmm5, Operand(rdx, 4)); __ blendvpd(xmm5, Operand(rdx, 4));
...@@ -829,6 +833,8 @@ TEST(DisasmX64) { ...@@ -829,6 +833,8 @@ TEST(DisasmX64) {
__ vpalignr(xmm1, xmm2, xmm3, 4); __ vpalignr(xmm1, xmm2, xmm3, 4);
__ vpalignr(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 4); __ vpalignr(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 4);
__ vpblendvb(xmm1, xmm2, xmm3, xmm4);
__ vblendvps(xmm1, xmm2, xmm3, xmm4);
__ vblendvpd(xmm1, xmm2, xmm3, xmm4); __ vblendvpd(xmm1, xmm2, xmm3, xmm4);
__ vmovddup(xmm1, xmm2); __ vmovddup(xmm1, xmm2);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "src/base/bits.h" #include "src/base/bits.h"
#include "src/base/overflowing-math.h" #include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h" #include "src/codegen/assembler-inl.h"
#include "src/common/globals.h"
#include "src/wasm/wasm-opcodes.h" #include "src/wasm/wasm-opcodes.h"
#include "test/cctest/cctest.h" #include "test/cctest/cctest.h"
#include "test/cctest/compiler/value-helper.h" #include "test/cctest/compiler/value-helper.h"
...@@ -792,6 +793,65 @@ WASM_SIMD_TEST(F32x4Le) { ...@@ -792,6 +793,65 @@ WASM_SIMD_TEST(F32x4Le) {
RunF32x4CompareOpTest(execution_tier, lower_simd, kExprF32x4Le, LessEqual); RunF32x4CompareOpTest(execution_tier, lower_simd, kExprF32x4Le, LessEqual);
} }
#if V8_TARGET_ARCH_X64
// TODO(v8:10983) Prototyping sign select.
template <typename T>
void RunSignSelect(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode signselect, WasmOpcode splat,
std::array<int8_t, kSimd128Size> mask) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, T, T> r(execution_tier, lower_simd);
T* output = r.builder().template AddGlobal<T>(kWasmS128);
// Splat 2 constant values, then use a mask that selects alternate lanes.
BUILD(r, WASM_GET_LOCAL(0), WASM_SIMD_OP(splat), WASM_GET_LOCAL(1),
WASM_SIMD_OP(splat), WASM_SIMD_CONSTANT(mask), WASM_SIMD_OP(signselect),
kExprGlobalSet, 0, WASM_ONE);
r.Call(1, 2);
constexpr int lanes = kSimd128Size / sizeof(T);
for (int i = 0; i < lanes; i += 2) {
CHECK_EQ(1, ReadLittleEndianValue<T>(&output[i]));
}
for (int i = 1; i < lanes; i += 2) {
CHECK_EQ(2, ReadLittleEndianValue<T>(&output[i]));
}
}
WASM_SIMD_TEST_NO_LOWERING(I8x16SignSelect) {
std::array<int8_t, kSimd128Size> mask = {0x80, 0, -1, 0, 0x80, 0, -1, 0,
0x80, 0, -1, 0, 0x80, 0, -1, 0};
RunSignSelect<int8_t>(execution_tier, lower_simd, kExprI8x16SignSelect,
kExprI8x16Splat, mask);
}
WASM_SIMD_TEST_NO_LOWERING(I16x8SignSelect) {
std::array<int16_t, kSimd128Size / 2> selection = {0x8000, 0, -1, 0,
0x8000, 0, -1, 0};
std::array<int8_t, kSimd128Size> mask;
memcpy(mask.data(), selection.data(), kSimd128Size);
RunSignSelect<int16_t>(execution_tier, lower_simd, kExprI16x8SignSelect,
kExprI16x8Splat, mask);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4SignSelect) {
std::array<int32_t, kSimd128Size / 4> selection = {0x80000000, 0, -1, 0};
std::array<int8_t, kSimd128Size> mask;
memcpy(mask.data(), selection.data(), kSimd128Size);
RunSignSelect<int32_t>(execution_tier, lower_simd, kExprI32x4SignSelect,
kExprI32x4Splat, mask);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2SignSelect) {
std::array<int64_t, kSimd128Size / 8> selection = {0x8000000000000000, 0};
std::array<int8_t, kSimd128Size> mask;
memcpy(mask.data(), selection.data(), kSimd128Size);
RunSignSelect<int64_t>(execution_tier, lower_simd, kExprI64x2SignSelect,
kExprI64x2Splat, mask);
}
#endif // V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X
WASM_SIMD_TEST_NO_LOWERING(F32x4Qfma) { WASM_SIMD_TEST_NO_LOWERING(F32x4Qfma) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "src/base/overflowing-math.h" #include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h" #include "src/codegen/assembler-inl.h"
#include "src/common/globals.h"
#include "src/compiler/wasm-compiler.h" #include "src/compiler/wasm-compiler.h"
#include "src/numbers/conversions.h" #include "src/numbers/conversions.h"
#include "src/objects/objects-inl.h" #include "src/objects/objects-inl.h"
...@@ -2764,6 +2765,18 @@ class WasmInterpreterInternals { ...@@ -2764,6 +2765,18 @@ class WasmInterpreterInternals {
return DoSimdStoreLane<int2, int64_t, int64_t>( return DoSimdStoreLane<int2, int64_t, int64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64); decoder, code, pc, len, MachineRepresentation::kWord64);
} }
case kExprI8x16SignSelect: {
return DoSimdSignSelect<int16>();
}
case kExprI16x8SignSelect: {
return DoSimdSignSelect<int8>();
}
case kExprI32x4SignSelect: {
return DoSimdSignSelect<int4>();
}
case kExprI64x2SignSelect: {
return DoSimdSignSelect<int2>();
}
default: default:
return false; return false;
} }
...@@ -2884,6 +2897,21 @@ class WasmInterpreterInternals { ...@@ -2884,6 +2897,21 @@ class WasmInterpreterInternals {
return true; return true;
} }
template <typename s_type>
bool DoSimdSignSelect() {
constexpr int lanes = kSimd128Size / sizeof(s_type::val[0]);
auto c = Pop().to_s128().to<s_type>();
auto v2 = Pop().to_s128().to<s_type>();
auto v1 = Pop().to_s128().to<s_type>();
s_type res;
for (int i = 0; i < lanes; ++i) {
res.val[LANE(i, res)] =
c.val[LANE(i, c)] < 0 ? v1.val[LANE(i, v1)] : v2.val[LANE(i, v2)];
}
Push(WasmValue(Simd128(res)));
return true;
}
// Check if our control stack (frames_) exceeds the limit. Trigger stack // Check if our control stack (frames_) exceeds the limit. Trigger stack
// overflow if it does, and unwinding the current frame. // overflow if it does, and unwinding the current frame.
// Returns true if execution can continue, false if the stack was fully // Returns true if execution can continue, false if the stack was fully
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment