Commit e1935574 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Prototype saturating rounding multiply high

Bug: v8:10971
Change-Id: I60186a445f3a5ad366cba4e6bcb16519098aa6ad
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2601009
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71886}
parent 7ddcd92e
...@@ -2032,6 +2032,20 @@ void TurboAssembler::Pshufb(XMMRegister dst, XMMRegister src, ...@@ -2032,6 +2032,20 @@ void TurboAssembler::Pshufb(XMMRegister dst, XMMRegister src,
} }
} }
void TurboAssembler::Pmulhrsw(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmulhrsw(dst, src1, src2);
} else {
if (dst != src1) {
Movdqa(dst, src1);
}
CpuFeatureScope sse_scope(this, SSSE3);
pmulhrsw(dst, src2);
}
}
void TurboAssembler::I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src) { void TurboAssembler::I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX); CpuFeatureScope avx_scope(this, AVX);
......
...@@ -568,6 +568,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -568,6 +568,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// Supports both SSE and AVX. Move src1 to dst if they are not equal on SSE. // Supports both SSE and AVX. Move src1 to dst if they are not equal on SSE.
void Pshufb(XMMRegister dst, XMMRegister src1, XMMRegister src2); void Pshufb(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Pmulhrsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
// These Wasm SIMD ops do not have direct lowerings on x64. These // These Wasm SIMD ops do not have direct lowerings on x64. These
// helpers are optimized to produce the fastest and smallest codegen. // helpers are optimized to produce the fastest and smallest codegen.
......
...@@ -140,7 +140,8 @@ ...@@ -140,7 +140,8 @@
V(pmaddubsw, 66, 0F, 38, 04) \ V(pmaddubsw, 66, 0F, 38, 04) \
V(psignb, 66, 0F, 38, 08) \ V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \ V(psignw, 66, 0F, 38, 09) \
V(psignd, 66, 0F, 38, 0A) V(psignd, 66, 0F, 38, 0A) \
V(pmulhrsw, 66, 0F, 38, 0B)
// SSSE3 instructions whose AVX version has two operands. // SSSE3 instructions whose AVX version has two operands.
#define SSSE3_UNOP_INSTRUCTION_LIST(V) \ #define SSSE3_UNOP_INSTRUCTION_LIST(V) \
......
...@@ -2748,10 +2748,12 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); } ...@@ -2748,10 +2748,12 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM // && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_MIPS // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_MIPS
#if !V8_TARGET_ARCH_ARM64 #if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s // TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 || !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM64
// TODO(v8:10972) Prototype i64x2 widen i32x4. // TODO(v8:10972) Prototype i64x2 widen i32x4.
void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) { void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
......
...@@ -3330,6 +3330,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3330,6 +3330,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmaddubsw(dst, src, kScratchDoubleReg); __ Pmaddubsw(dst, src, kScratchDoubleReg);
break; break;
} }
case kX64I16x8Q15MulRSatS: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src0 = i.InputSimd128Register(0);
XMMRegister src1 = i.InputSimd128Register(1);
// k = i16x8.splat(0x8000)
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psllw(kScratchDoubleReg, byte{15});
__ Pmulhrsw(dst, src0, src1);
__ Pcmpeqw(kScratchDoubleReg, dst);
__ Pxor(dst, kScratchDoubleReg);
break;
}
case kX64I8x16Splat: { case kX64I8x16Splat: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) { if (HasRegisterInput(instr, 0)) {
......
...@@ -292,6 +292,7 @@ namespace compiler { ...@@ -292,6 +292,7 @@ namespace compiler {
V(X64I16x8ExtMulHighI8x16U) \ V(X64I16x8ExtMulHighI8x16U) \
V(X64I16x8ExtAddPairwiseI8x16S) \ V(X64I16x8ExtAddPairwiseI8x16S) \
V(X64I16x8ExtAddPairwiseI8x16U) \ V(X64I16x8ExtAddPairwiseI8x16U) \
V(X64I16x8Q15MulRSatS) \
V(X64I8x16Splat) \ V(X64I8x16Splat) \
V(X64I8x16ExtractLaneS) \ V(X64I8x16ExtractLaneS) \
V(X64Pinsrb) \ V(X64Pinsrb) \
......
...@@ -268,6 +268,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -268,6 +268,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I16x8ExtMulHighI8x16U: case kX64I16x8ExtMulHighI8x16U:
case kX64I16x8ExtAddPairwiseI8x16S: case kX64I16x8ExtAddPairwiseI8x16S:
case kX64I16x8ExtAddPairwiseI8x16U: case kX64I16x8ExtAddPairwiseI8x16U:
case kX64I16x8Q15MulRSatS:
case kX64I8x16Splat: case kX64I8x16Splat:
case kX64I8x16ExtractLaneS: case kX64I8x16ExtractLaneS:
case kX64I8x16SConvertI16x8: case kX64I8x16SConvertI16x8:
......
...@@ -2860,6 +2860,7 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -2860,6 +2860,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I16x8ExtMulHighI8x16S) \ V(I16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U) \ V(I16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U) \ V(I16x8ExtMulHighI8x16U) \
V(I16x8Q15MulRSatS) \
V(I8x16SConvertI16x8) \ V(I8x16SConvertI16x8) \
V(I8x16UConvertI16x8) \ V(I8x16UConvertI16x8) \
V(I8x16Add) \ V(I8x16Add) \
......
...@@ -2328,14 +2328,14 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) { ...@@ -2328,14 +2328,14 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
base::RoundingAverageUnsigned); base::RoundingAverageUnsigned);
} }
#if V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s // TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) { WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS, RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
SaturateRoundingQMul<int16_t>); SaturateRoundingQMul<int16_t>);
} }
#endif // V8_TARGET_ARCH_ARM64 #endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
namespace { namespace {
enum class MulHalf { kLow, kHigh }; enum class MulHalf { kLow, kHigh };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment