Commit 90830b59 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Unify sse and avx impl for i32x4 shifts

The implementation is pretty much the same, and we instead delegate to a
macro assembler to decide if we want the sse or avx instruction.

This unification will simplify optimization of constant shifts later on.

Bug: v8:10115
Change-Id: If9a17519a746f0a8474e75dbdebb8e4f5b0d07c4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2026469Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66087}
parent e8bba383
...@@ -335,8 +335,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -335,8 +335,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Minpd, minpd) AVX_PACKED_OP3(Minpd, minpd)
AVX_PACKED_OP3(Maxpd, maxpd) AVX_PACKED_OP3(Maxpd, maxpd)
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd) AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
AVX_PACKED_OP3(Pslld, pslld)
AVX_PACKED_OP3(Psllq, psllq) AVX_PACKED_OP3(Psllq, psllq)
AVX_PACKED_OP3(Psrld, psrld)
AVX_PACKED_OP3(Psrlq, psrlq) AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Paddq, paddq) AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubq, psubq) AVX_PACKED_OP3(Psubq, psubq)
AVX_PACKED_OP3(Pmuludq, pmuludq) AVX_PACKED_OP3(Pmuludq, pmuludq)
......
...@@ -2487,44 +2487,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2487,44 +2487,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kSSEI32x4Shl: { case kIA32I32x4Shl: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ pslld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1); Register shift = i.InputRegister(1);
// Take shift value modulo 32. // Take shift value modulo 32.
__ and_(shift, 31); __ and_(shift, 31);
__ movd(tmp, shift); __ Movd(tmp, shift);
__ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); __ Pslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break;
}
case kSSEI32x4ShrS: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ psrad(i.OutputSimd128Register(), tmp);
break; break;
} }
case kAVXI32x4ShrS: { case kIA32I32x4ShrS: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1); Register shift = i.InputRegister(1);
// Take shift value modulo 32. // Take shift value modulo 32.
__ and_(shift, 31); __ and_(shift, 31);
__ movd(tmp, shift); __ Movd(tmp, shift);
__ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); __ Psrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break; break;
} }
case kSSEI32x4Add: { case kSSEI32x4Add: {
...@@ -2717,24 +2695,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2717,24 +2695,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmovzxwd(dst, dst); __ Pmovzxwd(dst, dst);
break; break;
} }
case kSSEI32x4ShrU: { case kIA32I32x4ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ psrld(i.OutputSimd128Register(), tmp);
break;
}
case kAVXI32x4ShrU: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1); Register shift = i.InputRegister(1);
// Take shift value modulo 32. // Take shift value modulo 32.
__ and_(shift, 31); __ and_(shift, 31);
__ movd(tmp, shift); __ movd(tmp, shift);
__ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp); __ Psrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
break; break;
} }
case kSSEI32x4MinU: { case kSSEI32x4MinU: {
......
...@@ -190,10 +190,8 @@ namespace compiler { ...@@ -190,10 +190,8 @@ namespace compiler {
V(IA32I32x4SConvertI16x8Low) \ V(IA32I32x4SConvertI16x8Low) \
V(IA32I32x4SConvertI16x8High) \ V(IA32I32x4SConvertI16x8High) \
V(IA32I32x4Neg) \ V(IA32I32x4Neg) \
V(SSEI32x4Shl) \ V(IA32I32x4Shl) \
V(AVXI32x4Shl) \ V(IA32I32x4ShrS) \
V(SSEI32x4ShrS) \
V(AVXI32x4ShrS) \
V(SSEI32x4Add) \ V(SSEI32x4Add) \
V(AVXI32x4Add) \ V(AVXI32x4Add) \
V(SSEI32x4AddHoriz) \ V(SSEI32x4AddHoriz) \
...@@ -218,8 +216,7 @@ namespace compiler { ...@@ -218,8 +216,7 @@ namespace compiler {
V(AVXI32x4UConvertF32x4) \ V(AVXI32x4UConvertF32x4) \
V(IA32I32x4UConvertI16x8Low) \ V(IA32I32x4UConvertI16x8Low) \
V(IA32I32x4UConvertI16x8High) \ V(IA32I32x4UConvertI16x8High) \
V(SSEI32x4ShrU) \ V(IA32I32x4ShrU) \
V(AVXI32x4ShrU) \
V(SSEI32x4MinU) \ V(SSEI32x4MinU) \
V(AVXI32x4MinU) \ V(AVXI32x4MinU) \
V(SSEI32x4MaxU) \ V(SSEI32x4MaxU) \
......
...@@ -171,10 +171,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -171,10 +171,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I32x4SConvertI16x8Low: case kIA32I32x4SConvertI16x8Low:
case kIA32I32x4SConvertI16x8High: case kIA32I32x4SConvertI16x8High:
case kIA32I32x4Neg: case kIA32I32x4Neg:
case kSSEI32x4Shl: case kIA32I32x4Shl:
case kAVXI32x4Shl: case kIA32I32x4ShrS:
case kSSEI32x4ShrS:
case kAVXI32x4ShrS:
case kSSEI32x4Add: case kSSEI32x4Add:
case kAVXI32x4Add: case kAVXI32x4Add:
case kSSEI32x4AddHoriz: case kSSEI32x4AddHoriz:
...@@ -199,8 +197,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -199,8 +197,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4UConvertF32x4: case kAVXI32x4UConvertF32x4:
case kIA32I32x4UConvertI16x8Low: case kIA32I32x4UConvertI16x8Low:
case kIA32I32x4UConvertI16x8High: case kIA32I32x4UConvertI16x8High:
case kSSEI32x4ShrU: case kIA32I32x4ShrU:
case kAVXI32x4ShrU:
case kSSEI32x4MinU: case kSSEI32x4MinU:
case kAVXI32x4MinU: case kAVXI32x4MinU:
case kSSEI32x4MaxU: case kSSEI32x4MaxU:
......
...@@ -2131,16 +2131,16 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2131,16 +2131,16 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(S1x16AllTrue) V(S1x16AllTrue)
#define SIMD_SHIFT_OPCODES(V) \ #define SIMD_SHIFT_OPCODES(V) \
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU) \
V(I16x8Shl) \ V(I16x8Shl) \
V(I16x8ShrS) \ V(I16x8ShrS) \
V(I16x8ShrU) V(I16x8ShrU)
#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \ #define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
V(I64x2Shl) \ V(I64x2Shl) \
V(I64x2ShrU) V(I64x2ShrU) \
V(I32x4Shl) \
V(I32x4ShrS) \
V(I32x4ShrU)
#define SIMD_I8X16_RIGHT_SHIFT_OPCODES(V) \ #define SIMD_I8X16_RIGHT_SHIFT_OPCODES(V) \
V(I8x16ShrS) \ V(I8x16ShrS) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment