Commit 3fca8f85 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Consolidate some SSE/AVX i8x16 opcodes

Bug: v8:11217
Change-Id: I6e61b11babc0baecf7b1982ef779b941d3344182
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2667971Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72493}
parent d913f046
...@@ -442,6 +442,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -442,6 +442,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Pavgb, pavgb) AVX_PACKED_OP3(Pavgb, pavgb)
AVX_PACKED_OP3(Pavgw, pavgw) AVX_PACKED_OP3(Pavgw, pavgw)
AVX_PACKED_OP3(Pand, pand) AVX_PACKED_OP3(Pand, pand)
AVX_PACKED_OP3(Pminub, pminub)
AVX_PACKED_OP3(Pmaxub, pmaxub)
AVX_PACKED_OP3(Paddusb, paddusb)
AVX_PACKED_OP3(Psubusb, psubusb)
AVX_PACKED_OP3(Pcmpgtb, pcmpgtb)
AVX_PACKED_OP3(Pcmpeqb, pcmpeqb)
AVX_PACKED_OP3(Paddb, paddb)
AVX_PACKED_OP3(Paddsb, paddsb)
AVX_PACKED_OP3(Psubb, psubb)
AVX_PACKED_OP3(Psubsb, psubsb)
#undef AVX_PACKED_OP3 #undef AVX_PACKED_OP3
AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t) AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t)
...@@ -527,6 +538,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -527,6 +538,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd) AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3) AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
AVX_OP3_XO_SSE4(Pminsb, pminsb)
AVX_OP3_XO_SSE4(Pmaxsb, pmaxsb)
#undef AVX_OP3_XO_SSE4 #undef AVX_OP3_XO_SSE4
#undef AVX_OP3_WITH_TYPE_SCOPE #undef AVX_OP3_WITH_TYPE_SCOPE
......
...@@ -3409,47 +3409,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3409,47 +3409,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kSSEI8x16Add: { case kIA32I8x16Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ paddb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16Add: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI8x16AddSatS: { case kIA32I8x16AddSatS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16AddSatS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI8x16Sub: { case kIA32I8x16Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ psubb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16Sub: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI8x16SubSatS: { case kIA32I8x16SubSatS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16SubSatS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
...@@ -3532,38 +3508,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3532,38 +3508,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpor(dst, dst, tmp); __ vpor(dst, dst, tmp);
break; break;
} }
case kSSEI8x16MinS: { case kIA32I8x16MinS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16MinS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI8x16MaxS: { case kIA32I8x16MaxS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16MaxS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI8x16Eq: { case kIA32I8x16Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16Eq: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
...@@ -3583,14 +3539,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3583,14 +3539,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kScratchDoubleReg); kScratchDoubleReg);
break; break;
} }
case kSSEI8x16GtS: { case kIA32I8x16GtS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16GtS: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
...@@ -3625,25 +3575,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3625,25 +3575,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpackuswb(dst, dst, i.InputOperand(1)); __ vpackuswb(dst, dst, i.InputOperand(1));
break; break;
} }
case kSSEI8x16AddSatU: { case kIA32I8x16AddSatU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16AddSatU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI8x16SubSatU: { case kIA32I8x16SubSatU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16SubSatU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
...@@ -3679,26 +3617,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3679,26 +3617,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kSSEI8x16MinU: { case kIA32I8x16MinU: {
XMMRegister dst = i.OutputSimd128Register(); __ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ pminub(dst, i.InputOperand(1));
break;
}
case kAVXI8x16MinU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI8x16MaxU: { case kIA32I8x16MaxU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16MaxU: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
......
...@@ -313,39 +313,27 @@ namespace compiler { ...@@ -313,39 +313,27 @@ namespace compiler {
V(IA32I8x16Neg) \ V(IA32I8x16Neg) \
V(IA32I8x16Shl) \ V(IA32I8x16Shl) \
V(IA32I8x16ShrS) \ V(IA32I8x16ShrS) \
V(SSEI8x16Add) \ V(IA32I8x16Add) \
V(AVXI8x16Add) \ V(IA32I8x16AddSatS) \
V(SSEI8x16AddSatS) \ V(IA32I8x16Sub) \
V(AVXI8x16AddSatS) \ V(IA32I8x16SubSatS) \
V(SSEI8x16Sub) \
V(AVXI8x16Sub) \
V(SSEI8x16SubSatS) \
V(AVXI8x16SubSatS) \
V(SSEI8x16Mul) \ V(SSEI8x16Mul) \
V(AVXI8x16Mul) \ V(AVXI8x16Mul) \
V(SSEI8x16MinS) \ V(IA32I8x16MinS) \
V(AVXI8x16MinS) \ V(IA32I8x16MaxS) \
V(SSEI8x16MaxS) \ V(IA32I8x16Eq) \
V(AVXI8x16MaxS) \
V(SSEI8x16Eq) \
V(AVXI8x16Eq) \
V(SSEI8x16Ne) \ V(SSEI8x16Ne) \
V(AVXI8x16Ne) \ V(AVXI8x16Ne) \
V(SSEI8x16GtS) \ V(IA32I8x16GtS) \
V(AVXI8x16GtS) \
V(SSEI8x16GeS) \ V(SSEI8x16GeS) \
V(AVXI8x16GeS) \ V(AVXI8x16GeS) \
V(SSEI8x16UConvertI16x8) \ V(SSEI8x16UConvertI16x8) \
V(AVXI8x16UConvertI16x8) \ V(AVXI8x16UConvertI16x8) \
V(SSEI8x16AddSatU) \ V(IA32I8x16AddSatU) \
V(AVXI8x16AddSatU) \ V(IA32I8x16SubSatU) \
V(SSEI8x16SubSatU) \
V(AVXI8x16SubSatU) \
V(IA32I8x16ShrU) \ V(IA32I8x16ShrU) \
V(SSEI8x16MinU) \ V(IA32I8x16MinU) \
V(AVXI8x16MinU) \ V(IA32I8x16MaxU) \
V(SSEI8x16MaxU) \
V(AVXI8x16MaxU) \
V(SSEI8x16GtU) \ V(SSEI8x16GtU) \
V(AVXI8x16GtU) \ V(AVXI8x16GtU) \
V(SSEI8x16GeU) \ V(SSEI8x16GeU) \
......
...@@ -295,39 +295,27 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -295,39 +295,27 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I8x16Neg: case kIA32I8x16Neg:
case kIA32I8x16Shl: case kIA32I8x16Shl:
case kIA32I8x16ShrS: case kIA32I8x16ShrS:
case kSSEI8x16Add: case kIA32I8x16Add:
case kAVXI8x16Add: case kIA32I8x16AddSatS:
case kSSEI8x16AddSatS: case kIA32I8x16Sub:
case kAVXI8x16AddSatS: case kIA32I8x16SubSatS:
case kSSEI8x16Sub:
case kAVXI8x16Sub:
case kSSEI8x16SubSatS:
case kAVXI8x16SubSatS:
case kSSEI8x16Mul: case kSSEI8x16Mul:
case kAVXI8x16Mul: case kAVXI8x16Mul:
case kSSEI8x16MinS: case kIA32I8x16MinS:
case kAVXI8x16MinS: case kIA32I8x16MaxS:
case kSSEI8x16MaxS: case kIA32I8x16Eq:
case kAVXI8x16MaxS:
case kSSEI8x16Eq:
case kAVXI8x16Eq:
case kSSEI8x16Ne: case kSSEI8x16Ne:
case kAVXI8x16Ne: case kAVXI8x16Ne:
case kSSEI8x16GtS: case kIA32I8x16GtS:
case kAVXI8x16GtS:
case kSSEI8x16GeS: case kSSEI8x16GeS:
case kAVXI8x16GeS: case kAVXI8x16GeS:
case kSSEI8x16UConvertI16x8: case kSSEI8x16UConvertI16x8:
case kAVXI8x16UConvertI16x8: case kAVXI8x16UConvertI16x8:
case kSSEI8x16AddSatU: case kIA32I8x16AddSatU:
case kAVXI8x16AddSatU: case kIA32I8x16SubSatU:
case kSSEI8x16SubSatU:
case kAVXI8x16SubSatU:
case kIA32I8x16ShrU: case kIA32I8x16ShrU:
case kSSEI8x16MinU: case kIA32I8x16MinU:
case kAVXI8x16MinU: case kIA32I8x16MaxU:
case kSSEI8x16MaxU:
case kAVXI8x16MaxU:
case kSSEI8x16GtU: case kSSEI8x16GtU:
case kAVXI8x16GtU: case kAVXI8x16GtU:
case kSSEI8x16GeU: case kSSEI8x16GeU:
......
...@@ -2256,20 +2256,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2256,20 +2256,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8GtU) \ V(I16x8GtU) \
V(I16x8GeU) \ V(I16x8GeU) \
V(I8x16SConvertI16x8) \ V(I8x16SConvertI16x8) \
V(I8x16Add) \
V(I8x16AddSatS) \
V(I8x16Sub) \
V(I8x16SubSatS) \
V(I8x16MinS) \
V(I8x16MaxS) \
V(I8x16Eq) \
V(I8x16Ne) \ V(I8x16Ne) \
V(I8x16GtS) \
V(I8x16GeS) \ V(I8x16GeS) \
V(I8x16AddSatU) \
V(I8x16SubSatU) \
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16GtU) \ V(I8x16GtU) \
V(I8x16GeU) \ V(I8x16GeU) \
V(S128And) \ V(S128And) \
...@@ -2287,6 +2275,18 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2287,6 +2275,18 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I64x2Eq) \ V(I64x2Eq) \
V(I32x4DotI16x8S) \ V(I32x4DotI16x8S) \
V(I16x8RoundingAverageU) \ V(I16x8RoundingAverageU) \
V(I8x16Add) \
V(I8x16AddSatS) \
V(I8x16Sub) \
V(I8x16SubSatS) \
V(I8x16MinS) \
V(I8x16MaxS) \
V(I8x16Eq) \
V(I8x16GtS) \
V(I8x16AddSatU) \
V(I8x16SubSatU) \
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16RoundingAverageU) V(I8x16RoundingAverageU)
// These opcodes require all inputs to be registers because the codegen is // These opcodes require all inputs to be registers because the codegen is
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment