Commit 90830b59 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Unify sse and avx impl for i32x4 shifts

The implementation is pretty much the same, and we instead delegate to a
macro assembler to decide if we want the sse or avx instruction.

This unification will simplify optimization of constant shifts later on.

Bug: v8:10115
Change-Id: If9a17519a746f0a8474e75dbdebb8e4f5b0d07c4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2026469Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66087}
parent e8bba383
......@@ -335,8 +335,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Minpd, minpd)
AVX_PACKED_OP3(Maxpd, maxpd)
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
AVX_PACKED_OP3(Pslld, pslld)
AVX_PACKED_OP3(Psllq, psllq)
AVX_PACKED_OP3(Psrld, psrld)
AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubq, psubq)
AVX_PACKED_OP3(Pmuludq, pmuludq)
......
......@@ -2487,44 +2487,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEI32x4Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ pslld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
case kIA32I32x4Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI32x4ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ psrad(i.OutputSimd128Register(), tmp);
__ Movd(tmp, shift);
__ Pslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kAVXI32x4ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
case kIA32I32x4ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
__ Movd(tmp, shift);
__ Psrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI32x4Add: {
......@@ -2717,24 +2695,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmovzxwd(dst, dst);
break;
}
case kSSEI32x4ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ psrld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
case kIA32I32x4ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
__ Psrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI32x4MinU: {
......
......@@ -190,10 +190,8 @@ namespace compiler {
V(IA32I32x4SConvertI16x8Low) \
V(IA32I32x4SConvertI16x8High) \
V(IA32I32x4Neg) \
V(SSEI32x4Shl) \
V(AVXI32x4Shl) \
V(SSEI32x4ShrS) \
V(AVXI32x4ShrS) \
V(IA32I32x4Shl) \
V(IA32I32x4ShrS) \
V(SSEI32x4Add) \
V(AVXI32x4Add) \
V(SSEI32x4AddHoriz) \
......@@ -218,8 +216,7 @@ namespace compiler {
V(AVXI32x4UConvertF32x4) \
V(IA32I32x4UConvertI16x8Low) \
V(IA32I32x4UConvertI16x8High) \
V(SSEI32x4ShrU) \
V(AVXI32x4ShrU) \
V(IA32I32x4ShrU) \
V(SSEI32x4MinU) \
V(AVXI32x4MinU) \
V(SSEI32x4MaxU) \
......
......@@ -171,10 +171,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I32x4SConvertI16x8Low:
case kIA32I32x4SConvertI16x8High:
case kIA32I32x4Neg:
case kSSEI32x4Shl:
case kAVXI32x4Shl:
case kSSEI32x4ShrS:
case kAVXI32x4ShrS:
case kIA32I32x4Shl:
case kIA32I32x4ShrS:
case kSSEI32x4Add:
case kAVXI32x4Add:
case kSSEI32x4AddHoriz:
......@@ -199,8 +197,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4UConvertF32x4:
case kIA32I32x4UConvertI16x8Low:
case kIA32I32x4UConvertI16x8High:
case kSSEI32x4ShrU:
case kAVXI32x4ShrU:
case kIA32I32x4ShrU:
case kSSEI32x4MinU:
case kAVXI32x4MinU:
case kSSEI32x4MaxU:
......
......@@ -2131,16 +2131,16 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(S1x16AllTrue)
#define SIMD_SHIFT_OPCODES(V) \
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU) \
V(I16x8Shl) \
V(I16x8ShrS) \
V(I16x8ShrU)
#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
V(I64x2Shl) \
V(I64x2ShrU)
V(I64x2ShrU) \
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU)
#define SIMD_I8X16_RIGHT_SHIFT_OPCODES(V) \
V(I8x16ShrS) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment