Commit 148b5391 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Unify sse and avx impl for i16x8 shifts

The implementation is pretty much the same, and we instead delegate to a
macro assembler to decide if we want the sse or avx instruction.

This unification will simplify optimization of constant shifts later on.

Bug: v8:10115
Change-Id: I68e60cb3fd51156438989812be189f71e6e47ba7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2026470Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66121}
parent 1775684e
......@@ -335,10 +335,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Minpd, minpd)
AVX_PACKED_OP3(Maxpd, maxpd)
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
AVX_PACKED_OP3(Psllw, psllw)
AVX_PACKED_OP3(Pslld, pslld)
AVX_PACKED_OP3(Psllq, psllq)
AVX_PACKED_OP3(Psrlw, psrlw)
AVX_PACKED_OP3(Psrld, psrld)
AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psraw, psraw)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubq, psubq)
......
......@@ -2818,44 +2818,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEI16x8Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ psllw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI16x8ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
case kIA32I16x8Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ psraw(i.OutputSimd128Register(), tmp);
__ Movd(tmp, shift);
__ Psllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kAVXI16x8ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
case kIA32I16x8ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
__ Movd(tmp, shift);
__ Psraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI16x8SConvertI32x4: {
......@@ -3022,24 +3000,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmovzxbw(dst, dst);
break;
}
case kSSEI16x8ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ psrlw(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI16x8ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
case kIA32I16x8ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ movd(tmp, shift);
__ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
__ Movd(tmp, shift);
__ Psrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI16x8UConvertI32x4: {
......
......@@ -233,10 +233,8 @@ namespace compiler {
V(IA32I16x8SConvertI8x16Low) \
V(IA32I16x8SConvertI8x16High) \
V(IA32I16x8Neg) \
V(SSEI16x8Shl) \
V(AVXI16x8Shl) \
V(SSEI16x8ShrS) \
V(AVXI16x8ShrS) \
V(IA32I16x8Shl) \
V(IA32I16x8ShrS) \
V(SSEI16x8SConvertI32x4) \
V(AVXI16x8SConvertI32x4) \
V(SSEI16x8Add) \
......@@ -265,8 +263,7 @@ namespace compiler {
V(AVXI16x8GeS) \
V(IA32I16x8UConvertI8x16Low) \
V(IA32I16x8UConvertI8x16High) \
V(SSEI16x8ShrU) \
V(AVXI16x8ShrU) \
V(IA32I16x8ShrU) \
V(SSEI16x8UConvertI32x4) \
V(AVXI16x8UConvertI32x4) \
V(SSEI16x8AddSaturateU) \
......
......@@ -214,10 +214,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I16x8SConvertI8x16Low:
case kIA32I16x8SConvertI8x16High:
case kIA32I16x8Neg:
case kSSEI16x8Shl:
case kAVXI16x8Shl:
case kSSEI16x8ShrS:
case kAVXI16x8ShrS:
case kIA32I16x8Shl:
case kIA32I16x8ShrS:
case kSSEI16x8SConvertI32x4:
case kAVXI16x8SConvertI32x4:
case kSSEI16x8Add:
......@@ -246,8 +244,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI16x8GeS:
case kIA32I16x8UConvertI8x16Low:
case kIA32I16x8UConvertI8x16High:
case kSSEI16x8ShrU:
case kAVXI16x8ShrU:
case kIA32I16x8ShrU:
case kSSEI16x8UConvertI32x4:
case kAVXI16x8UConvertI32x4:
case kSSEI16x8AddSaturateU:
......
......@@ -305,18 +305,13 @@ void VisitRRISimd(InstructionSelector* selector, Node* node,
}
void VisitRROSimdShift(InstructionSelector* selector, Node* node,
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempSimd128Register()};
if (selector->IsSupported(AVX)) {
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1,
arraysize(temps), temps);
} else {
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
}
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
}
void VisitRROI8x16SimdRightShift(InstructionSelector* selector, Node* node,
......@@ -2130,17 +2125,15 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(S1x8AllTrue) \
V(S1x16AllTrue)
#define SIMD_SHIFT_OPCODES(V) \
V(I16x8Shl) \
V(I16x8ShrS) \
V(I16x8ShrU)
#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
V(I64x2Shl) \
V(I64x2ShrU) \
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU)
V(I32x4ShrU) \
V(I16x8Shl) \
V(I16x8ShrS) \
V(I16x8ShrU)
#define SIMD_I8X16_RIGHT_SHIFT_OPCODES(V) \
V(I8x16ShrS) \
......@@ -2360,17 +2353,9 @@ VISIT_SIMD_REPLACE_LANE(F32x4)
VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2)
#undef VISIT_SIMD_REPLACE_LANE_USE_REG
#define VISIT_SIMD_SHIFT(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROSimdShift(this, node, kAVX##Opcode, kSSE##Opcode); \
}
SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
#undef VISIT_SIMD_SHIFT
#undef SIMD_SHIFT_OPCODES
#define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROSimdShift(this, node, kIA32##Opcode, kIA32##Opcode); \
#define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
VisitRROSimdShift(this, node, kIA32##Opcode); \
}
SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment