Commit eaf30440 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-simd] Share extadd pairwise implementation

Bug: v8:11589
Change-Id: I7c97920d8ab94408b5cde4e90e7ff1aa9bcaeeba
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3119995Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76511}
parent 24af48d6
...@@ -701,95 +701,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src, ...@@ -701,95 +701,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
} }
} }
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
XMMRegister tmp,
Register scratch) {
// pmaddubsw treats the first operand as unsigned, so pass the external
// reference to as the first operand.
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vmovdqa(tmp, op);
vpmaddubsw(dst, tmp, src);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
if (dst == src) {
movaps(tmp, op);
pmaddubsw(tmp, src);
movaps(dst, tmp);
} else {
movaps(dst, op);
pmaddubsw(dst, src);
}
}
}
void TurboAssembler::I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
Register scratch) {
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddubsw(dst, src, op);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
movaps(dst, src);
pmaddubsw(dst, op);
}
}
void TurboAssembler::I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
Register scratch) {
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
// pmaddwd multiplies signed words in src and op, producing
// signed doublewords, then adds pairwise.
// src = |a|b|c|d|e|f|g|h|
// dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
Pmaddwd(dst, src, op);
}
void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
XMMRegister tmp) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// src = |a|b|c|d|e|f|g|h| (low)
// scratch = |0|a|0|c|0|e|0|g|
vpsrld(tmp, src, 16);
// dst = |0|b|0|d|0|f|0|h|
vpblendw(dst, src, tmp, 0xAA);
// dst = |a+b|c+d|e+f|g+h|
vpaddd(dst, tmp, dst);
} else if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
// There is a potentially better lowering if we get rip-relative constants,
// see https://github.com/WebAssembly/simd/pull/380.
movaps(tmp, src);
psrld(tmp, 16);
if (dst != src) {
movaps(dst, src);
}
pblendw(dst, tmp, 0xAA);
paddd(dst, tmp);
} else {
// src = |a|b|c|d|e|f|g|h|
// tmp = i32x4.splat(0x0000FFFF)
pcmpeqd(tmp, tmp);
psrld(tmp, byte{16});
// tmp =|0|b|0|d|0|f|0|h|
andps(tmp, src);
// dst = |0|a|0|c|0|e|0|g|
if (dst != src) {
movaps(dst, src);
}
psrld(dst, byte{16});
// dst = |a+b|c+d|e+f|g+h|
paddd(dst, tmp);
}
}
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src, void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
XMMRegister mask, XMMRegister scratch, XMMRegister mask, XMMRegister scratch,
Register tmp, bool omit_add) { Register tmp, bool omit_add) {
......
...@@ -326,7 +326,6 @@ class V8_EXPORT_PRIVATE TurboAssembler ...@@ -326,7 +326,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
AVX_OP3_WITH_MOVE(Cmpeqps, cmpeqps, XMMRegister, Operand) AVX_OP3_WITH_MOVE(Cmpeqps, cmpeqps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Movlps, movlps, XMMRegister, Operand) AVX_OP3_WITH_MOVE(Movlps, movlps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Movhps, movhps, XMMRegister, Operand) AVX_OP3_WITH_MOVE(Movhps, movhps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Pmaddwd, pmaddwd, XMMRegister, Operand)
#undef AVX_OP3_WITH_MOVE #undef AVX_OP3_WITH_MOVE
// TODO(zhin): Remove after moving more definitions into SharedTurboAssembler. // TODO(zhin): Remove after moving more definitions into SharedTurboAssembler.
...@@ -397,14 +396,6 @@ class V8_EXPORT_PRIVATE TurboAssembler ...@@ -397,14 +396,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// Defined here to allow usage on both TurboFan and Liftoff. // Defined here to allow usage on both TurboFan and Liftoff.
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp1, void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp1,
XMMRegister tmp2, Register scratch); XMMRegister tmp2, Register scratch);
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
XMMRegister tmp, Register scratch);
void I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
Register scratch);
void I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
Register scratch);
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
XMMRegister tmp);
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask, void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
XMMRegister scratch, Register tmp, bool omit_add = false); XMMRegister scratch, Register tmp, bool omit_add = false);
......
...@@ -588,6 +588,47 @@ void SharedTurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, ...@@ -588,6 +588,47 @@ void SharedTurboAssembler::I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1,
Pxor(dst, scratch); Pxor(dst, scratch);
} }
void SharedTurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
XMMRegister src,
XMMRegister tmp) {
ASM_CODE_COMMENT(this);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// src = |a|b|c|d|e|f|g|h| (low)
// scratch = |0|a|0|c|0|e|0|g|
vpsrld(tmp, src, 16);
// dst = |0|b|0|d|0|f|0|h|
vpblendw(dst, src, tmp, 0xAA);
// dst = |a+b|c+d|e+f|g+h|
vpaddd(dst, tmp, dst);
} else if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
// There is a potentially better lowering if we get rip-relative
// constants, see https://github.com/WebAssembly/simd/pull/380.
movaps(tmp, src);
psrld(tmp, 16);
if (dst != src) {
movaps(dst, src);
}
pblendw(dst, tmp, 0xAA);
paddd(dst, tmp);
} else {
// src = |a|b|c|d|e|f|g|h|
// tmp = i32x4.splat(0x0000FFFF)
pcmpeqd(tmp, tmp);
psrld(tmp, byte{16});
// tmp =|0|b|0|d|0|f|0|h|
andps(tmp, src);
// dst = |0|a|0|c|0|e|0|g|
if (dst != src) {
movaps(dst, src);
}
psrld(dst, byte{16});
// dst = |a+b|c+d|e+f|g+h|
paddd(dst, tmp);
}
}
// 1. Multiply low word into scratch. // 1. Multiply low word into scratch.
// 2. Multiply high word (can be signed or unsigned) into dst. // 2. Multiply high word (can be signed or unsigned) into dst.
// 3. Unpack and interleave scratch and dst into dst. // 3. Unpack and interleave scratch and dst into dst.
......
...@@ -235,6 +235,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -235,6 +235,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP(Pcmpeqd, pcmpeqd) AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pcmpeqw, pcmpeqw) AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pinsrw, pinsrw) AVX_OP(Pinsrw, pinsrw)
AVX_OP(Pmaddwd, pmaddwd)
AVX_OP(Pmaxsw, pmaxsw) AVX_OP(Pmaxsw, pmaxsw)
AVX_OP(Pmaxub, pmaxub) AVX_OP(Pmaxub, pmaxub)
AVX_OP(Pminsw, pminsw) AVX_OP(Pminsw, pminsw)
...@@ -361,6 +362,8 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { ...@@ -361,6 +362,8 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
// Will move src1 to dst if AVX is not supported. // Will move src1 to dst if AVX is not supported.
void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2, void I16x8Q15MulRSatS(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch); XMMRegister scratch);
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
XMMRegister tmp);
// Requires that dst == src1 if AVX is not supported. // Requires that dst == src1 if AVX is not supported.
void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2, void I32x4ExtMul(XMMRegister dst, XMMRegister src1, XMMRegister src2,
XMMRegister scratch, bool low, bool is_signed); XMMRegister scratch, bool low, bool is_signed);
...@@ -515,6 +518,63 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler { ...@@ -515,6 +518,63 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
} }
} }
void I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
Register scratch) {
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
// pmaddwd multiplies signed words in src and op, producing
// signed doublewords, then adds pairwise.
// src = |a|b|c|d|e|f|g|h|
// dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
if (!CpuFeatures::IsSupported(AVX) && (dst != src)) {
movaps(dst, src);
src = dst;
}
Pmaddwd(dst, src, op);
}
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp) {
ASM_CODE_COMMENT(this);
// pmaddubsw treats the first operand as unsigned, so pass the external
// reference to it as the first operand.
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01(), tmp);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vmovdqa(scratch, op);
vpmaddubsw(dst, scratch, src);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
if (dst == src) {
movaps(scratch, op);
pmaddubsw(scratch, src);
movaps(dst, scratch);
} else {
movaps(dst, op);
pmaddubsw(dst, src);
}
}
}
void I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
Register scratch) {
ASM_CODE_COMMENT(this);
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddubsw(dst, src, op);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
if (dst != src) {
movaps(dst, src);
}
pmaddubsw(dst, op);
}
}
private: private:
// All implementation-specific methods must be called through this. // All implementation-specific methods must be called through this.
Impl* impl() { return static_cast<Impl*>(this); } Impl* impl() { return static_cast<Impl*>(this); }
......
...@@ -2004,31 +2004,6 @@ void TurboAssembler::JumpCodeTObject(Register code, JumpMode jump_mode) { ...@@ -2004,31 +2004,6 @@ void TurboAssembler::JumpCodeTObject(Register code, JumpMode jump_mode) {
} }
} }
void TurboAssembler::Pmaddwd(XMMRegister dst, XMMRegister src1, Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddwd(dst, src1, src2);
} else {
if (dst != src1) {
movaps(dst, src1);
}
pmaddwd(dst, src2);
}
}
void TurboAssembler::Pmaddwd(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddwd(dst, src1, src2);
} else {
if (dst != src1) {
movaps(dst, src1);
}
pmaddwd(dst, src2);
}
}
void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1, void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
Operand src2) { Operand src2) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
...@@ -2302,68 +2277,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src, ...@@ -2302,68 +2277,6 @@ void TurboAssembler::I8x16Popcnt(XMMRegister dst, XMMRegister src,
} }
} }
void TurboAssembler::I16x8ExtAddPairwiseI8x16S(XMMRegister dst,
XMMRegister src) {
// pmaddubsw treats the first operand as unsigned, so the external reference
// to be passed to it as the first operand.
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01());
if (dst == src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vmovdqa(kScratchDoubleReg, op);
vpmaddubsw(dst, kScratchDoubleReg, src);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
movaps(kScratchDoubleReg, op);
pmaddubsw(kScratchDoubleReg, src);
movaps(dst, kScratchDoubleReg);
}
} else {
Movdqa(dst, op);
Pmaddubsw(dst, dst, src);
}
}
void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
// src = |a|b|c|d|e|f|g|h| (low)
// scratch = |0|a|0|c|0|e|0|g|
vpsrld(kScratchDoubleReg, src, 16);
// dst = |0|b|0|d|0|f|0|h|
vpblendw(dst, src, kScratchDoubleReg, 0xAA);
// dst = |a+b|c+d|e+f|g+h|
vpaddd(dst, kScratchDoubleReg, dst);
} else if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
// There is a potentially better lowering if we get rip-relative constants,
// see https://github.com/WebAssembly/simd/pull/380.
movaps(kScratchDoubleReg, src);
psrld(kScratchDoubleReg, 16);
if (dst != src) {
movaps(dst, src);
}
pblendw(dst, kScratchDoubleReg, 0xAA);
paddd(dst, kScratchDoubleReg);
} else {
// src = |a|b|c|d|e|f|g|h|
// kScratchDoubleReg = i32x4.splat(0x0000FFFF)
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
psrld(kScratchDoubleReg, byte{16});
// kScratchDoubleReg =|0|b|0|d|0|f|0|h|
andps(kScratchDoubleReg, src);
// dst = |0|a|0|c|0|e|0|g|
if (dst != src) {
movaps(dst, src);
}
psrld(dst, byte{16});
// dst = |a+b|c+d|e+f|g+h|
paddd(dst, kScratchDoubleReg);
}
}
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src, void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
XMMRegister mask, bool omit_add) { XMMRegister mask, bool omit_add) {
if (omit_add) { if (omit_add) {
......
...@@ -447,8 +447,6 @@ class V8_EXPORT_PRIVATE TurboAssembler ...@@ -447,8 +447,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
void DebugBreak(); void DebugBreak();
// Will move src1 to dst if dst != src1. // Will move src1 to dst if dst != src1.
void Pmaddwd(XMMRegister dst, XMMRegister src1, Operand src2);
void Pmaddwd(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2); void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2); void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
...@@ -481,9 +479,6 @@ class V8_EXPORT_PRIVATE TurboAssembler ...@@ -481,9 +479,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
// Defined here to allow usage on both TurboFan and Liftoff. // Defined here to allow usage on both TurboFan and Liftoff.
void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp); void I8x16Popcnt(XMMRegister dst, XMMRegister src, XMMRegister tmp);
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src);
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src);
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask, void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
bool omit_add = false); bool omit_add = false);
......
...@@ -3167,21 +3167,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3167,21 +3167,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I32x4ExtAddPairwiseI16x8S: { case kX64I32x4ExtAddPairwiseI16x8S: {
XMMRegister dst = i.OutputSimd128Register(); __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
XMMRegister src1 = i.InputSimd128Register(0); i.InputSimd128Register(0), kScratchRegister);
// pmaddwd multiplies signed words in src1 and src2, producing signed
// doublewords, then adds pairwise.
// src1 = |a|b|c|d|e|f|g|h|
// src2 = |1|1|1|1|1|1|1|1|
// dst = | a*1 + b*1 | c*1 + d*1 | e*1 + f*1 | g*1 + h*1 |
Operand src2 = __ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i16x8_splat_0x0001());
__ Pmaddwd(dst, src1, src2);
break; break;
} }
case kX64I32x4ExtAddPairwiseI16x8U: { case kX64I32x4ExtAddPairwiseI16x8U: {
__ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(), __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
i.InputSimd128Register(0)); i.InputSimd128Register(0),
kScratchDoubleReg);
break; break;
} }
case kX64S128Const: { case kX64S128Const: {
...@@ -3394,15 +3387,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3394,15 +3387,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I16x8ExtAddPairwiseI8x16S: { case kX64I16x8ExtAddPairwiseI8x16S: {
__ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(), __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
i.InputSimd128Register(0)); i.InputSimd128Register(0), kScratchDoubleReg,
kScratchRegister);
break; break;
} }
case kX64I16x8ExtAddPairwiseI8x16U: { case kX64I16x8ExtAddPairwiseI8x16U: {
XMMRegister dst = i.OutputSimd128Register(); __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
XMMRegister src1 = i.InputSimd128Register(0); i.InputSimd128Register(0), kScratchRegister);
Operand src2 = __ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01());
__ Pmaddubsw(dst, src1, src2);
break; break;
} }
case kX64I16x8Q15MulRSatS: { case kX64I16x8Q15MulRSatS: {
......
...@@ -3131,14 +3131,13 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, ...@@ -3131,14 +3131,13 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp()); I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), kScratchDoubleReg,
kScratchRegister);
} }
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Operand op = ExternalReferenceAsOperand( I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(), kScratchRegister);
ExternalReference::address_of_wasm_i8x16_splat_0x01());
Pmaddubsw(dst.fp(), src.fp(), op);
} }
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
...@@ -3287,14 +3286,12 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst, ...@@ -3287,14 +3286,12 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Operand op = ExternalReferenceAsOperand( I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(), kScratchRegister);
ExternalReference::address_of_wasm_i16x8_splat_0x0001());
Pmaddwd(dst.fp(), src.fp(), op);
} }
void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp()); I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), kScratchDoubleReg);
} }
namespace liftoff { namespace liftoff {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment