Commit 5ce3afe2 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Fix F64x2ConvertLowI32x4U isel and codegen

The previous instruction selection was too loose, it only required
registers for the inputs. The codegen also used Unpcklps(dst, mask), and
failed to use src at all. The test case was accidentally passing
because dst == src (xmm0) by chance.

We fix this bug requiring that for AVX, any register is fine, but for
SSE, require dst == src. Also redefine Unpcklps to check dst == src in
the no AVX case.

Bug: v8:11265
Change-Id: I1988b2d2da8263512bf6e675e6297c50f55663f7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2668918Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72536}
parent fec9c5d6
......@@ -1825,6 +1825,16 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
}
}
void TurboAssembler::Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vunpcklps(dst, src1, src2);
} else {
DCHECK_EQ(dst, src1);
unpcklps(dst, src2);
}
}
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
......
......@@ -162,7 +162,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)
AVX_OP(Unpcklps, unpcklps)
AVX_OP(Andps, andps)
AVX_OP(Andnps, andnps)
AVX_OP(Andpd, andpd)
......@@ -542,6 +541,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2);
// Shufps that will mov src1 into dst if AVX is not supported.
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8);
......
......@@ -2483,13 +2483,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64F64x2ConvertLowI32x4U: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
__ Unpcklps(
dst, __ ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
dst, src,
__ ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
__ Subpd(dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
......
......@@ -10,7 +10,9 @@
#include "src/base/platform/wrappers.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/machine-type.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/backend/instruction.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
......@@ -2931,7 +2933,6 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_UNOP_LIST(V) \
V(F64x2Sqrt) \
V(F64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4) \
V(F32x4SConvertI32x4) \
V(F32x4Abs) \
......@@ -3726,6 +3727,13 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) {
arraysize(temps), temps);
}
void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
X64OperandGenerator g(this);
InstructionOperand dst =
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
Emit(kX64F64x2ConvertLowI32x4U, dst, g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
X64OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment