Commit 39f9936c authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Fix ext add codegen

The main problem here is that the macro-assembler for Pmaddubsw and
Pmaddwd expects dst == src1 when AVX is not supported.

For Pmaddwd, we use an existing macro to define the function.

For Pmaddubsw, we do the AVX check inline and use movaps if not
supported because it requires a SSSE3 scope, and we don't have an
existing macro to do this (we can find other uses and clean up as
necessary in the future).

Bug: v8:11086
Change-Id: I97bd29cd93456744414d28e5f1ffcbc875c3ab22
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2716740Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73111}
parent fcf29e48
......@@ -1160,19 +1160,20 @@ void TurboAssembler::I16x8ExtAddPairwiseI8x16U(XMMRegister dst, XMMRegister src,
Register scratch) {
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x01(), scratch);
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpmaddubsw(dst, src, op);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
movaps(dst, src);
pmaddubsw(dst, op);
}
Pmaddubsw(dst, src, op);
}
void TurboAssembler::I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
Register scratch) {
Operand op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i16x8_splat_0x0001(), scratch);
if (!CpuFeatures::IsSupported(AVX) && dst != src) {
movaps(dst, src);
}
// pmaddwd multiplies signed words in src and op, producing
// signed doublewords, then adds pairwise.
// src = |a|b|c|d|e|f|g|h|
......
......@@ -442,7 +442,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Psrlq, psrlq)
AVX_PACKED_OP3(Psraw, psraw)
AVX_PACKED_OP3(Psrad, psrad)
AVX_PACKED_OP3(Pmaddwd, pmaddwd)
AVX_PACKED_OP3(Paddd, paddd)
AVX_PACKED_OP3(Paddq, paddq)
AVX_PACKED_OP3(Psubd, psubd)
......@@ -491,6 +490,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
}
AVX_OP3_WITH_MOVE(Movlps, movlps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Movhps, movhps, XMMRegister, Operand)
AVX_OP3_WITH_MOVE(Pmaddwd, pmaddwd, XMMRegister, Operand)
#undef AVX_OP3_WITH_MOVE
// Non-SSE2 instructions.
......@@ -564,8 +564,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
AVX_OP3_XO_SSE4(Pminsb, pminsb)
AVX_OP3_XO_SSE4(Pmaxsb, pmaxsb)
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, XMMRegister, SSSE3)
AVX_OP3_WITH_TYPE_SCOPE(Pmaddubsw, pmaddubsw, XMMRegister, Operand, SSSE3)
#undef AVX_OP3_XO_SSE4
#undef AVX_OP3_WITH_TYPE_SCOPE
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment