Commit b0d79120 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Prototype sign select

Prototype i8x16, i16x8, i32x4, i64x2 sign select on x64 and interpreter.

Bug: v8:10983
Change-Id: I7d6f39a2cb4c2aefe31daac782978fe8b363dd1a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2486235
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70818}
parent fd12dfb9
......@@ -1066,6 +1066,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
#undef DECLARE_SSE4_INSTRUCTION
......@@ -1126,6 +1128,20 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_UNOP_AVX_INSTRUCTION)
#undef DECLARE_SSSE3_UNOP_AVX_INSTRUCTION
void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask) {
vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0);
// The mask operand is encoded in bits[7:4] of the immediate byte.
emit(mask.code() << 4);
}
void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask) {
vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0);
// The mask operand is encoded in bits[7:4] of the immediate byte.
emit(mask.code() << 4);
}
void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask) {
vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0);
......
......@@ -1880,6 +1880,42 @@ void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
}
}
void TurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpblendvb(dst, src1, src2, mask);
} else {
DCHECK_EQ(dst, src1);
DCHECK_EQ(xmm0, mask);
pblendvb(dst, src2);
}
}
void TurboAssembler::Blendvps(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vblendvps(dst, src1, src2, mask);
} else {
DCHECK_EQ(dst, src1);
DCHECK_EQ(xmm0, mask);
blendvps(dst, src2);
}
}
void TurboAssembler::Blendvpd(XMMRegister dst, XMMRegister src1,
XMMRegister src2, XMMRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vblendvpd(dst, src1, src2, mask);
} else {
DCHECK_EQ(dst, src1);
DCHECK_EQ(xmm0, mask);
blendvpd(dst, src2);
}
}
void TurboAssembler::Psrld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......
......@@ -540,6 +540,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pslld(XMMRegister dst, byte imm8);
void Psrld(XMMRegister dst, byte imm8);
void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister mask);
void CompareRoot(Register with, RootIndex index);
void CompareRoot(Operand with, RootIndex index);
......
......@@ -2023,6 +2023,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI64x2ExtMulLowI32x4U(node);
case IrOpcode::kI64x2ExtMulHighI32x4U:
return MarkAsSimd128(node), VisitI64x2ExtMulHighI32x4U(node);
case IrOpcode::kI64x2SignSelect:
return MarkAsSimd128(node), VisitI64x2SignSelect(node);
case IrOpcode::kI32x4Splat:
return MarkAsSimd128(node), VisitI32x4Splat(node);
case IrOpcode::kI32x4ExtractLane:
......@@ -2091,6 +2093,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4ExtMulLowI16x8U(node);
case IrOpcode::kI32x4ExtMulHighI16x8U:
return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8U(node);
case IrOpcode::kI32x4SignSelect:
return MarkAsSimd128(node), VisitI32x4SignSelect(node);
case IrOpcode::kI16x8Splat:
return MarkAsSimd128(node), VisitI16x8Splat(node);
case IrOpcode::kI16x8ExtractLaneU:
......@@ -2171,6 +2175,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI16x8ExtMulLowI8x16U(node);
case IrOpcode::kI16x8ExtMulHighI8x16U:
return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16U(node);
case IrOpcode::kI16x8SignSelect:
return MarkAsSimd128(node), VisitI16x8SignSelect(node);
case IrOpcode::kI8x16Splat:
return MarkAsSimd128(node), VisitI8x16Splat(node);
case IrOpcode::kI8x16ExtractLaneU:
......@@ -2233,6 +2239,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI8x16Abs(node);
case IrOpcode::kI8x16BitMask:
return MarkAsWord32(node), VisitI8x16BitMask(node);
case IrOpcode::kI8x16SignSelect:
return MarkAsSimd128(node), VisitI8x16SignSelect(node);
case IrOpcode::kS128Const:
return MarkAsSimd128(node), VisitS128Const(node);
case IrOpcode::kS128Zero:
......@@ -2771,6 +2779,12 @@ void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10997) Prototype i64x2.bitmask.
void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10983) Prototyping sign select.
void InstructionSelector::VisitI8x16SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -3511,6 +3511,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
case kX64I8x16SignSelect: {
__ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64I16x8SignSelect: {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsraw(kScratchDoubleReg, i.InputSimd128Register(2), 15);
__ vpblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchDoubleReg);
} else {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister mask = i.InputSimd128Register(2);
DCHECK_EQ(xmm0, mask);
__ movapd(kScratchDoubleReg, mask);
__ pxor(mask, mask);
__ pcmpgtw(mask, kScratchDoubleReg);
__ pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(1));
// Restore mask.
__ movapd(mask, kScratchDoubleReg);
}
break;
}
case kX64I32x4SignSelect: {
__ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64I64x2SignSelect: {
__ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64S128And: {
ASSEMBLE_SIMD_BINOP(pand);
break;
......
......@@ -211,6 +211,7 @@ namespace compiler {
V(X64I64x2Mul) \
V(X64I64x2Eq) \
V(X64I64x2ShrU) \
V(X64I64x2SignSelect) \
V(X64I32x4Splat) \
V(X64I32x4ExtractLane) \
V(X64I32x4SConvertF32x4) \
......@@ -240,6 +241,7 @@ namespace compiler {
V(X64I32x4Abs) \
V(X64I32x4BitMask) \
V(X64I32x4DotI16x8S) \
V(X64I32x4SignSelect) \
V(X64I16x8Splat) \
V(X64I16x8ExtractLaneS) \
V(X64I16x8SConvertI8x16Low) \
......@@ -273,6 +275,7 @@ namespace compiler {
V(X64I16x8RoundingAverageU) \
V(X64I16x8Abs) \
V(X64I16x8BitMask) \
V(X64I16x8SignSelect) \
V(X64I8x16Splat) \
V(X64I8x16ExtractLaneS) \
V(X64Pinsrb) \
......@@ -307,6 +310,7 @@ namespace compiler {
V(X64I8x16RoundingAverageU) \
V(X64I8x16Abs) \
V(X64I8x16BitMask) \
V(X64I8x16SignSelect) \
V(X64S128Const) \
V(X64S128Zero) \
V(X64S128AllOnes) \
......
......@@ -187,6 +187,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I64x2Mul:
case kX64I64x2Eq:
case kX64I64x2ShrU:
case kX64I64x2SignSelect:
case kX64I32x4Splat:
case kX64I32x4ExtractLane:
case kX64I32x4SConvertF32x4:
......@@ -216,6 +217,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4Abs:
case kX64I32x4BitMask:
case kX64I32x4DotI16x8S:
case kX64I32x4SignSelect:
case kX64I16x8Splat:
case kX64I16x8ExtractLaneS:
case kX64I16x8SConvertI8x16Low:
......@@ -249,6 +251,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8RoundingAverageU:
case kX64I16x8Abs:
case kX64I16x8BitMask:
case kX64I16x8SignSelect:
case kX64I8x16Splat:
case kX64I8x16ExtractLaneS:
case kX64I8x16SConvertI16x8:
......@@ -277,6 +280,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16RoundingAverageU:
case kX64I8x16Abs:
case kX64I8x16BitMask:
case kX64I8x16SignSelect:
case kX64S128And:
case kX64S128Or:
case kX64S128Xor:
......
......@@ -3139,6 +3139,40 @@ void InstructionSelector::VisitS128Select(Node* node) {
g.UseRegister(node->InputAt(2)));
}
namespace {
void VisitSignSelect(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
X64OperandGenerator g(selector);
// signselect(x, y, -1) = x
// pblendvb(dst, x, y, -1) = dst <- y, so we need to swap x and y.
if (selector->IsSupported(AVX)) {
selector->Emit(
opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(2)));
} else {
selector->Emit(
opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(2), xmm0));
}
}
} // namespace
void InstructionSelector::VisitI8x16SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I8x16SignSelect);
}
void InstructionSelector::VisitI16x8SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I16x8SignSelect);
}
void InstructionSelector::VisitI32x4SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I32x4SignSelect);
}
void InstructionSelector::VisitI64x2SignSelect(Node* node) {
VisitSignSelect(this, node, kX64I64x2SignSelect);
}
void InstructionSelector::VisitS128AndNot(Node* node) {
X64OperandGenerator g(this);
// andnps a b does ~a & b, but we want a & !b, so flip the input.
......
......@@ -429,6 +429,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I64x2ExtMulHighI32x4S, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulLowI32x4U, Operator::kCommutative, 2, 0, 1) \
V(I64x2ExtMulHighI32x4U, Operator::kCommutative, 2, 0, 1) \
V(I64x2SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \
......@@ -461,6 +462,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I32x4ExtMulHighI16x8S, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulLowI16x8U, Operator::kCommutative, 2, 0, 1) \
V(I32x4ExtMulHighI16x8U, Operator::kCommutative, 2, 0, 1) \
V(I32x4SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \
......@@ -498,6 +500,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I16x8ExtMulHighI8x16S, Operator::kCommutative, 2, 0, 1) \
V(I16x8ExtMulLowI8x16U, Operator::kCommutative, 2, 0, 1) \
V(I16x8ExtMulHighI8x16U, Operator::kCommutative, 2, 0, 1) \
V(I16x8SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(I8x16Splat, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Neg, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Shl, Operator::kNoProperties, 2, 0, 1) \
......@@ -526,6 +529,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I8x16Popcnt, Operator::kNoProperties, 1, 0, 1) \
V(I8x16Abs, Operator::kNoProperties, 1, 0, 1) \
V(I8x16BitMask, Operator::kNoProperties, 1, 0, 1) \
V(I8x16SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(S128Load, Operator::kNoProperties, 2, 0, 1) \
V(S128Store, Operator::kNoProperties, 3, 0, 1) \
V(S128Zero, Operator::kNoProperties, 0, 0, 1) \
......
......@@ -675,6 +675,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I64x2ExtMulHighI32x4S();
const Operator* I64x2ExtMulLowI32x4U();
const Operator* I64x2ExtMulHighI32x4U();
const Operator* I64x2SignSelect();
const Operator* I32x4Splat();
const Operator* I32x4ExtractLane(int32_t);
......@@ -711,6 +712,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4ExtMulHighI16x8S();
const Operator* I32x4ExtMulLowI16x8U();
const Operator* I32x4ExtMulHighI16x8U();
const Operator* I32x4SignSelect();
const Operator* I16x8Splat();
const Operator* I16x8ExtractLaneU(int32_t);
......@@ -753,6 +755,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8ExtMulHighI8x16S();
const Operator* I16x8ExtMulLowI8x16U();
const Operator* I16x8ExtMulHighI8x16U();
const Operator* I16x8SignSelect();
const Operator* I8x16Splat();
const Operator* I8x16ExtractLaneU(int32_t);
......@@ -786,6 +789,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I8x16Popcnt();
const Operator* I8x16Abs();
const Operator* I8x16BitMask();
const Operator* I8x16SignSelect();
const Operator* S128Load();
const Operator* S128Store();
......
......@@ -835,6 +835,7 @@
V(I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \
V(I64x2SignSelect) \
V(I32x4Splat) \
V(I32x4ExtractLane) \
V(I32x4ReplaceLane) \
......@@ -873,6 +874,7 @@
V(I32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U) \
V(I32x4SignSelect) \
V(I16x8Splat) \
V(I16x8ExtractLaneU) \
V(I16x8ExtractLaneS) \
......@@ -917,6 +919,7 @@
V(I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U) \
V(I16x8SignSelect) \
V(I8x16Splat) \
V(I8x16ExtractLaneU) \
V(I8x16ExtractLaneS) \
......@@ -952,6 +955,7 @@
V(I8x16Popcnt) \
V(I8x16Abs) \
V(I8x16BitMask) \
V(I8x16SignSelect) \
V(S128Load) \
V(S128Store) \
V(S128Zero) \
......
......@@ -4681,6 +4681,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2ExtMulHighI32x4U:
return graph()->NewNode(mcgraph()->machine()->I64x2ExtMulHighI32x4U(),
inputs[0], inputs[1]);
case wasm::kExprI64x2SignSelect:
return graph()->NewNode(mcgraph()->machine()->I64x2SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI32x4Splat:
return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]);
case wasm::kExprI32x4SConvertF32x4:
......@@ -4785,6 +4788,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI32x4ExtMulHighI16x8U:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtMulHighI16x8U(),
inputs[0], inputs[1]);
case wasm::kExprI32x4SignSelect:
return graph()->NewNode(mcgraph()->machine()->I32x4SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI16x8Splat:
return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
case wasm::kExprI16x8SConvertI8x16Low:
......@@ -4904,6 +4910,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8ExtMulHighI8x16U:
return graph()->NewNode(mcgraph()->machine()->I16x8ExtMulHighI8x16U(),
inputs[0], inputs[1]);
case wasm::kExprI16x8SignSelect:
return graph()->NewNode(mcgraph()->machine()->I16x8SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI8x16Splat:
return graph()->NewNode(mcgraph()->machine()->I8x16Splat(), inputs[0]);
case wasm::kExprI8x16Neg:
......@@ -4995,6 +5004,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return graph()->NewNode(mcgraph()->machine()->I8x16Abs(), inputs[0]);
case wasm::kExprI8x16BitMask:
return graph()->NewNode(mcgraph()->machine()->I8x16BitMask(), inputs[0]);
case wasm::kExprI8x16SignSelect:
return graph()->NewNode(mcgraph()->machine()->I8x16SignSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprS128And:
return graph()->NewNode(mcgraph()->machine()->S128And(), inputs[0],
inputs[1]);
......
......@@ -1047,6 +1047,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x4A: {
AppendToBuffer("vblendvps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister((*current++) >> 4));
break;
}
case 0x4B: {
AppendToBuffer("vblendvpd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -1054,6 +1061,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
AppendToBuffer(",%s", NameOfXMMRegister((*current++) >> 4));
break;
}
case 0x4C: {
AppendToBuffer("vpblendvb %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister((*current++) >> 4));
break;
}
default:
UnimplementedInstruction();
}
......@@ -2353,6 +2367,18 @@ int DisassemblerX64::ThreeByteOpcodeInstruction(byte* data) {
get_modrm(*current, &mod, &regop, &rm);
if (second_byte == 0x38) {
switch (third_byte) {
case 0x10: {
AppendToBuffer("pblendvb %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",<xmm0>");
break;
}
case 0x14: {
AppendToBuffer("blendvps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",<xmm0>");
break;
}
case 0x15: {
current += PrintOperands("blendvpd", XMMREG_XMMOPER_OP_ORDER, current);
AppendToBuffer(",<xmm0>");
......
......@@ -354,6 +354,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(I32x4, ExtMulHighI16x8, "extmul_high_i16x8")
CASE_SIGN_OP(I64x2, ExtMulLowI32x4, "extmul_low_i32x4")
CASE_SIGN_OP(I64x2, ExtMulHighI32x4, "extmul_high_i32x4")
CASE_SIMDI_OP(SignSelect, "signselect")
CASE_I64x2_OP(SignSelect, "signselect")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
......
......@@ -474,6 +474,10 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \
V(I8x16Popcnt, 0xfd7c, s_s) \
V(I8x16SignSelect, 0xfd7d, s_sss) \
V(I16x8SignSelect, 0xfd7e, s_sss) \
V(I32x4SignSelect, 0xfd7f, s_sss) \
V(I64x2SignSelect, 0xfd94, s_sss) \
V(I16x8Q15MulRSatS, 0xfd9c, s_ss) \
V(I16x8ExtMulLowI8x16S, 0xfd9a, s_ss) \
V(I16x8ExtMulHighI8x16S, 0xfd9d, s_ss) \
......
......@@ -590,6 +590,10 @@ TEST(DisasmX64) {
__ cvtps2dq(xmm5, Operand(rdx, 4));
__ cvtdq2ps(xmm5, xmm1);
__ cvtdq2ps(xmm5, Operand(rdx, 4));
__ pblendvb(xmm5, xmm1);
__ blendvps(xmm5, xmm1);
__ blendvps(xmm5, Operand(rdx, 4));
__ blendvpd(xmm5, xmm1);
__ blendvpd(xmm5, Operand(rdx, 4));
......@@ -829,6 +833,8 @@ TEST(DisasmX64) {
__ vpalignr(xmm1, xmm2, xmm3, 4);
__ vpalignr(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 4);
__ vpblendvb(xmm1, xmm2, xmm3, xmm4);
__ vblendvps(xmm1, xmm2, xmm3, xmm4);
__ vblendvpd(xmm1, xmm2, xmm3, xmm4);
__ vmovddup(xmm1, xmm2);
......
......@@ -8,6 +8,7 @@
#include "src/base/bits.h"
#include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h"
#include "src/common/globals.h"
#include "src/wasm/wasm-opcodes.h"
#include "test/cctest/cctest.h"
#include "test/cctest/compiler/value-helper.h"
......@@ -792,6 +793,65 @@ WASM_SIMD_TEST(F32x4Le) {
RunF32x4CompareOpTest(execution_tier, lower_simd, kExprF32x4Le, LessEqual);
}
#if V8_TARGET_ARCH_X64
// TODO(v8:10983) Prototyping sign select.
template <typename T>
void RunSignSelect(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode signselect, WasmOpcode splat,
std::array<int8_t, kSimd128Size> mask) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, T, T> r(execution_tier, lower_simd);
T* output = r.builder().template AddGlobal<T>(kWasmS128);
// Splat 2 constant values, then use a mask that selects alternate lanes.
BUILD(r, WASM_GET_LOCAL(0), WASM_SIMD_OP(splat), WASM_GET_LOCAL(1),
WASM_SIMD_OP(splat), WASM_SIMD_CONSTANT(mask), WASM_SIMD_OP(signselect),
kExprGlobalSet, 0, WASM_ONE);
r.Call(1, 2);
constexpr int lanes = kSimd128Size / sizeof(T);
for (int i = 0; i < lanes; i += 2) {
CHECK_EQ(1, ReadLittleEndianValue<T>(&output[i]));
}
for (int i = 1; i < lanes; i += 2) {
CHECK_EQ(2, ReadLittleEndianValue<T>(&output[i]));
}
}
WASM_SIMD_TEST_NO_LOWERING(I8x16SignSelect) {
std::array<int8_t, kSimd128Size> mask = {0x80, 0, -1, 0, 0x80, 0, -1, 0,
0x80, 0, -1, 0, 0x80, 0, -1, 0};
RunSignSelect<int8_t>(execution_tier, lower_simd, kExprI8x16SignSelect,
kExprI8x16Splat, mask);
}
WASM_SIMD_TEST_NO_LOWERING(I16x8SignSelect) {
std::array<int16_t, kSimd128Size / 2> selection = {0x8000, 0, -1, 0,
0x8000, 0, -1, 0};
std::array<int8_t, kSimd128Size> mask;
memcpy(mask.data(), selection.data(), kSimd128Size);
RunSignSelect<int16_t>(execution_tier, lower_simd, kExprI16x8SignSelect,
kExprI16x8Splat, mask);
}
WASM_SIMD_TEST_NO_LOWERING(I32x4SignSelect) {
std::array<int32_t, kSimd128Size / 4> selection = {0x80000000, 0, -1, 0};
std::array<int8_t, kSimd128Size> mask;
memcpy(mask.data(), selection.data(), kSimd128Size);
RunSignSelect<int32_t>(execution_tier, lower_simd, kExprI32x4SignSelect,
kExprI32x4Splat, mask);
}
WASM_SIMD_TEST_NO_LOWERING(I64x2SignSelect) {
std::array<int64_t, kSimd128Size / 8> selection = {0x8000000000000000, 0};
std::array<int8_t, kSimd128Size> mask;
memcpy(mask.data(), selection.data(), kSimd128Size);
RunSignSelect<int64_t>(execution_tier, lower_simd, kExprI64x2SignSelect,
kExprI64x2Splat, mask);
}
#endif // V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X
WASM_SIMD_TEST_NO_LOWERING(F32x4Qfma) {
FLAG_SCOPE(wasm_simd_post_mvp);
......
......@@ -9,6 +9,7 @@
#include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h"
#include "src/common/globals.h"
#include "src/compiler/wasm-compiler.h"
#include "src/numbers/conversions.h"
#include "src/objects/objects-inl.h"
......@@ -2764,6 +2765,18 @@ class WasmInterpreterInternals {
return DoSimdStoreLane<int2, int64_t, int64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
case kExprI8x16SignSelect: {
return DoSimdSignSelect<int16>();
}
case kExprI16x8SignSelect: {
return DoSimdSignSelect<int8>();
}
case kExprI32x4SignSelect: {
return DoSimdSignSelect<int4>();
}
case kExprI64x2SignSelect: {
return DoSimdSignSelect<int2>();
}
default:
return false;
}
......@@ -2884,6 +2897,21 @@ class WasmInterpreterInternals {
return true;
}
template <typename s_type>
bool DoSimdSignSelect() {
constexpr int lanes = kSimd128Size / sizeof(s_type::val[0]);
auto c = Pop().to_s128().to<s_type>();
auto v2 = Pop().to_s128().to<s_type>();
auto v1 = Pop().to_s128().to<s_type>();
s_type res;
for (int i = 0; i < lanes; ++i) {
res.val[LANE(i, res)] =
c.val[LANE(i, c)] < 0 ? v1.val[LANE(i, v1)] : v2.val[LANE(i, v2)];
}
Push(WasmValue(Simd128(res)));
return true;
}
// Check if our control stack (frames_) exceeds the limit. Trigger stack
// overflow if it does, and unwinding the current frame.
// Returns true if execution can continue, false if the stack was fully
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment