Commit 08ccfb20 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Prototype i32x4.dot_i16x8_s

This implements I32x4DotI16x8S for ia32.

Also fixes instruction-selector for SIMD ops, they should all set operand1 to be a register, since we do not have memory alignment yet.

Bug: v8:10583
Change-Id: Id273816efd5eea128580f3f7bde533a8e1b2435d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2231031
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68444}
parent 1c39569e
...@@ -385,6 +385,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -385,6 +385,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Psrlq, psrlq) AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psraw, psraw) AVX_PACKED_OP3(Psraw, psraw)
AVX_PACKED_OP3(Psrad, psrad) AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Pmaddwd, pmaddwd)
AVX_PACKED_OP3(Paddd, paddd) AVX_PACKED_OP3(Paddd, paddd)
AVX_PACKED_OP3(Paddq, paddq) AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubq, psubq) AVX_PACKED_OP3(Psubq, psubq)
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
V(packsswb, 66, 0F, 63) \ V(packsswb, 66, 0F, 63) \
V(packssdw, 66, 0F, 6B) \ V(packssdw, 66, 0F, 6B) \
V(packuswb, 66, 0F, 67) \ V(packuswb, 66, 0F, 67) \
V(pmaddwd, 66, 0F, F5) \
V(paddb, 66, 0F, FC) \ V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \ V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \ V(paddd, 66, 0F, FE) \
......
...@@ -2807,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2807,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0)); __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
break; break;
} }
case kIA32I32x4DotI16x8S: {
__ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kIA32I16x8Splat: { case kIA32I16x8Splat: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0)); __ Movd(dst, i.InputOperand(0));
......
...@@ -234,6 +234,7 @@ namespace compiler { ...@@ -234,6 +234,7 @@ namespace compiler {
V(AVXI32x4GeU) \ V(AVXI32x4GeU) \
V(IA32I32x4Abs) \ V(IA32I32x4Abs) \
V(IA32I32x4BitMask) \ V(IA32I32x4BitMask) \
V(IA32I32x4DotI16x8S) \
V(IA32I16x8Splat) \ V(IA32I16x8Splat) \
V(IA32I16x8ExtractLaneU) \ V(IA32I16x8ExtractLaneU) \
V(IA32I16x8ExtractLaneS) \ V(IA32I16x8ExtractLaneS) \
......
...@@ -215,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -215,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4GeU: case kAVXI32x4GeU:
case kIA32I32x4Abs: case kIA32I32x4Abs:
case kIA32I32x4BitMask: case kIA32I32x4BitMask:
case kIA32I32x4DotI16x8S:
case kIA32I16x8Splat: case kIA32I16x8Splat:
case kIA32I16x8ExtractLaneU: case kIA32I16x8ExtractLaneU:
case kIA32I16x8ExtractLaneS: case kIA32I16x8ExtractLaneS:
......
...@@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node, ...@@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node,
} }
} }
// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
// a register as we don't have memory alignment yet. For AVX, memory operands
// are fine, but can have performance issues if not aligned to 16/32 bytes
// (based on load size), see SDM Vol 1, chapter 14.9
void VisitRROSimd(InstructionSelector* selector, Node* node,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
g.Use(node->InputAt(1)));
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
g.UseRegister(node->InputAt(1)));
}
}
void VisitRRISimd(InstructionSelector* selector, Node* node, void VisitRRISimd(InstructionSelector* selector, Node* node,
ArchOpcode opcode) { ArchOpcode opcode) {
IA32OperandGenerator g(selector); IA32OperandGenerator g(selector);
...@@ -2109,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2109,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \ #define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
V(I64x2Add) \ V(I64x2Add) \
V(I64x2Sub) \ V(I64x2Sub) \
V(I32x4DotI16x8S) \
V(I16x8RoundingAverageU) \ V(I16x8RoundingAverageU) \
V(I8x16RoundingAverageU) V(I8x16RoundingAverageU)
...@@ -2422,17 +2440,17 @@ SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE) ...@@ -2422,17 +2440,17 @@ SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
#undef VISIT_SIMD_ALLTRUE #undef VISIT_SIMD_ALLTRUE
#undef SIMD_ALLTRUE_LIST #undef SIMD_ALLTRUE_LIST
#define VISIT_SIMD_BINOP(Opcode) \ #define VISIT_SIMD_BINOP(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \ void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \ VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
} }
SIMD_BINOP_LIST(VISIT_SIMD_BINOP) SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
#undef VISIT_SIMD_BINOP #undef VISIT_SIMD_BINOP
#undef SIMD_BINOP_LIST #undef SIMD_BINOP_LIST
#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \ #define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \ void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \ VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
} }
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX) SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX #undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
......
...@@ -2705,10 +2705,10 @@ void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); } ...@@ -2705,10 +2705,10 @@ void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
// && !V8_TARGET_ARCH_IA32 // && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s // TODO(v8:10583) Prototype i32x4.dot_i16x8_s
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -2317,7 +2317,7 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) { ...@@ -2317,7 +2317,7 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
} }
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s // TODO(v8:10583) Prototype i32x4.dot_i16x8_s
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) { WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
...@@ -2344,7 +2344,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) { ...@@ -2344,7 +2344,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4DotI16x8S) {
} }
} }
} }
#endif // V8_TARGET_ARCH_X64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunI16x8ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int16ShiftOp expected_op) { WasmOpcode opcode, Int16ShiftOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment