Commit 63eb1a89 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-simd] Merge SSE/AVX I16x8UConvertI32x4 I8x16UconvertI16x8

No functionality change is expected.

Bug: v8:11217
Change-Id: I131d52794e4de24ec838cc23f15828edbfc656ff
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3131372Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76738}
parent e29acc3b
...@@ -332,6 +332,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -332,6 +332,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSSE3(Psignw, psignw) AVX_OP_SSSE3(Psignw, psignw)
AVX_OP_SSE4_1(Extractps, extractps) AVX_OP_SSE4_1(Extractps, extractps)
AVX_OP_SSE4_1(Packusdw, packusdw)
AVX_OP_SSE4_1(Pblendw, pblendw) AVX_OP_SSE4_1(Pblendw, pblendw)
AVX_OP_SSE4_1(Pextrb, pextrb) AVX_OP_SSE4_1(Pextrb, pextrb)
AVX_OP_SSE4_1(Pextrw, pextrw) AVX_OP_SSE4_1(Pextrw, pextrw)
......
...@@ -2700,17 +2700,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2700,17 +2700,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_SIMD_SHIFT(Psrlw, 4); ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
break; break;
} }
case kSSEI16x8UConvertI32x4: { case kIA32I16x8UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Packusdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
CpuFeatureScope sse_scope(tasm(), SSE4_1); i.InputSimd128Register(1));
__ packusdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kAVXI16x8UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
__ vpackusdw(dst, dst, i.InputSimd128Register(1));
break; break;
} }
case kIA32I16x8AddSatU: { case kIA32I16x8AddSatU: {
...@@ -2988,18 +2980,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2988,18 +2980,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2); __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break; break;
} }
case kSSEI8x16UConvertI16x8: { case kIA32I8x16UConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Packuswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
CpuFeatureScope sse_scope(tasm(), SSE4_1); i.InputSimd128Register(1));
XMMRegister dst = i.OutputSimd128Register();
__ packuswb(dst, i.InputOperand(1));
break;
}
case kAVXI8x16UConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
__ vpackuswb(dst, dst, i.InputOperand(1));
break; break;
} }
case kIA32I8x16AddSatU: { case kIA32I8x16AddSatU: {
......
...@@ -236,8 +236,7 @@ namespace compiler { ...@@ -236,8 +236,7 @@ namespace compiler {
V(IA32I16x8UConvertI8x16Low) \ V(IA32I16x8UConvertI8x16Low) \
V(IA32I16x8UConvertI8x16High) \ V(IA32I16x8UConvertI8x16High) \
V(IA32I16x8ShrU) \ V(IA32I16x8ShrU) \
V(SSEI16x8UConvertI32x4) \ V(IA32I16x8UConvertI32x4) \
V(AVXI16x8UConvertI32x4) \
V(IA32I16x8AddSatU) \ V(IA32I16x8AddSatU) \
V(IA32I16x8SubSatU) \ V(IA32I16x8SubSatU) \
V(IA32I16x8MinU) \ V(IA32I16x8MinU) \
...@@ -280,8 +279,7 @@ namespace compiler { ...@@ -280,8 +279,7 @@ namespace compiler {
V(IA32I8x16GtS) \ V(IA32I8x16GtS) \
V(SSEI8x16GeS) \ V(SSEI8x16GeS) \
V(AVXI8x16GeS) \ V(AVXI8x16GeS) \
V(SSEI8x16UConvertI16x8) \ V(IA32I8x16UConvertI16x8) \
V(AVXI8x16UConvertI16x8) \
V(IA32I8x16AddSatU) \ V(IA32I8x16AddSatU) \
V(IA32I8x16SubSatU) \ V(IA32I8x16SubSatU) \
V(IA32I8x16ShrU) \ V(IA32I8x16ShrU) \
......
...@@ -221,8 +221,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -221,8 +221,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I16x8UConvertI8x16Low: case kIA32I16x8UConvertI8x16Low:
case kIA32I16x8UConvertI8x16High: case kIA32I16x8UConvertI8x16High:
case kIA32I16x8ShrU: case kIA32I16x8ShrU:
case kSSEI16x8UConvertI32x4: case kIA32I16x8UConvertI32x4:
case kAVXI16x8UConvertI32x4:
case kIA32I16x8AddSatU: case kIA32I16x8AddSatU:
case kIA32I16x8SubSatU: case kIA32I16x8SubSatU:
case kIA32I16x8MinU: case kIA32I16x8MinU:
...@@ -265,8 +264,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -265,8 +264,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I8x16GtS: case kIA32I8x16GtS:
case kSSEI8x16GeS: case kSSEI8x16GeS:
case kAVXI8x16GeS: case kAVXI8x16GeS:
case kSSEI8x16UConvertI16x8: case kIA32I8x16UConvertI16x8:
case kAVXI8x16UConvertI16x8:
case kIA32I8x16AddSatU: case kIA32I8x16AddSatU:
case kIA32I8x16SubSatU: case kIA32I8x16SubSatU:
case kIA32I8x16ShrU: case kIA32I8x16ShrU:
......
...@@ -2299,6 +2299,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2299,6 +2299,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8MinU) \ V(I16x8MinU) \
V(I16x8MaxU) \ V(I16x8MaxU) \
V(I16x8SConvertI32x4) \ V(I16x8SConvertI32x4) \
V(I16x8UConvertI32x4) \
V(I16x8RoundingAverageU) \ V(I16x8RoundingAverageU) \
V(I8x16Add) \ V(I8x16Add) \
V(I8x16AddSatS) \ V(I8x16AddSatS) \
...@@ -2313,6 +2314,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2313,6 +2314,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I8x16MinU) \ V(I8x16MinU) \
V(I8x16MaxU) \ V(I8x16MaxU) \
V(I8x16SConvertI16x8) \ V(I8x16SConvertI16x8) \
V(I8x16UConvertI16x8) \
V(I8x16RoundingAverageU) \ V(I8x16RoundingAverageU) \
V(S128And) \ V(S128And) \
V(S128Or) \ V(S128Or) \
...@@ -2686,26 +2688,6 @@ SIMD_BINOP_RRR(VISIT_SIMD_BINOP_RRR) ...@@ -2686,26 +2688,6 @@ SIMD_BINOP_RRR(VISIT_SIMD_BINOP_RRR)
#undef VISIT_SIMD_BINOP_RRR #undef VISIT_SIMD_BINOP_RRR
#undef SIMD_BINOP_RRR #undef SIMD_BINOP_RRR
// TODO(v8:9198): SSE requires operand1 to be a register as we don't have memory
// alignment yet. For AVX, memory operands are fine, but can have performance
// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1,
// chapter 14.9
void VisitPack(InstructionSelector* selector, Node* node, ArchOpcode avx_opcode,
ArchOpcode sse_opcode) {
IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineSameAsFirst(node), operand0, operand1);
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
}
}
void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
VisitPack(this, node, kAVXI16x8UConvertI32x4, kSSEI16x8UConvertI32x4);
}
void InstructionSelector::VisitI16x8BitMask(Node* node) { void InstructionSelector::VisitI16x8BitMask(Node* node) {
IA32OperandGenerator g(this); IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()}; InstructionOperand temps[] = {g.TempSimd128Register()};
...@@ -2713,10 +2695,6 @@ void InstructionSelector::VisitI16x8BitMask(Node* node) { ...@@ -2713,10 +2695,6 @@ void InstructionSelector::VisitI16x8BitMask(Node* node) {
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
} }
void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
VisitPack(this, node, kAVXI8x16UConvertI16x8, kSSEI8x16UConvertI16x8);
}
void InstructionSelector::VisitI8x16Shl(Node* node) { void InstructionSelector::VisitI8x16Shl(Node* node) {
VisitI8x16Shift(this, node, kIA32I8x16Shl); VisitI8x16Shift(this, node, kIA32I8x16Shl);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment