[wasm-simd][x64] Fix F64x2ConvertLowI32x4U isel and codegen

The previous instruction selection was too loose, it only required registers for the inputs. The codegen also used Unpcklps(dst, mask), and failed to use src at all. The test case was accidentally passing because dst == src (xmm0) by chance. We fix this bug requiring that for AVX, any register is fine, but for SSE, require dst == src. Also redefine Unpcklps to check dst == src in the no AVX case. Bug: v8:11265 Change-Id: I1988b2d2da8263512bf6e675e6297c50f55663f7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2668918Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72536}

[wasm-simd][x64] Fix F64x2ConvertLowI32x4U isel and codegen
The previous instruction selection was too loose, it only required registers for the inputs. The codegen also used Unpcklps(dst, mask), and failed to use src at all. The test case was accidentally passing because dst == src (xmm0) by chance. We fix this bug requiring that for AVX, any register is fine, but for SSE, require dst == src. Also redefine Unpcklps to check dst == src in the no AVX case. Bug: v8:11265 Change-Id: I1988b2d2da8263512bf6e675e6297c50f55663f7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2668918Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72536}
5ce3afe2 · Ng Zhi An · Commit Bot · fec9c5d6 · 5ce3afe2 · 5ce3afe2
Commit 5ce3afe2 authored Feb 03, 2021 by Ng Zhi An Committed by Commit Bot Feb 04, 2021
4 changed files
--- a/src/codegen/x64/macro-assembler-x64.cc
+++ b/src/codegen/x64/macro-assembler-x64.cc
@@ -1825,6 +1825,16 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
  }
 }
+void TurboAssembler::Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2) {
+  if (CpuFeatures::IsSupported(AVX)) {
+    CpuFeatureScope avx_scope(this, AVX);
+    vunpcklps(dst, src1, src2);
+  } else {
+    DCHECK_EQ(dst, src1);
+    unpcklps(dst, src2);
+  }
+}
 void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
                            byte imm8) {
  if (CpuFeatures::IsSupported(AVX)) {

--- a/src/codegen/x64/macro-assembler-x64.h
+++ b/src/codegen/x64/macro-assembler-x64.h
@@ -162,7 +162,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
  AVX_OP(Addss, addss)
  AVX_OP(Addsd, addsd)
  AVX_OP(Mulsd, mulsd)
-  AVX_OP(Unpcklps, unpcklps)
  AVX_OP(Andps, andps)
  AVX_OP(Andnps, andnps)
  AVX_OP(Andpd, andpd)
@@ -542,6 +541,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
  void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
  void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
+  void Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2);
  // Shufps that will mov src1 into dst if AVX is not supported.
  void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8);

--- a/src/compiler/backend/x64/code-generator-x64.cc
+++ b/src/compiler/backend/x64/code-generator-x64.cc
@@ -2483,13 +2483,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    }
    case kX64F64x2ConvertLowI32x4U: {
      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
      // dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
      // 0x43300000'00000000 is a special double where the significand bits
      // precisely represents all uint32 numbers.
      __ Unpcklps(
-          dst, __ ExternalReferenceAsOperand(
+          dst, src,
-                   ExternalReference::
+          __ ExternalReferenceAsOperand(
-                       address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
+              ExternalReference::
+                  address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
      __ Subpd(dst,
               __ ExternalReferenceAsOperand(
                   ExternalReference::address_of_wasm_double_2_power_52()));

--- a/src/compiler/backend/x64/instruction-selector-x64.cc
+++ b/src/compiler/backend/x64/instruction-selector-x64.cc
@@ -10,7 +10,9 @@
 #include "src/base/platform/wrappers.h"
 #include "src/codegen/cpu-features.h"
 #include "src/codegen/machine-type.h"
+#include "src/compiler/backend/instruction-codes.h"
 #include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/backend/instruction.h"
 #include "src/compiler/machine-operator.h"
 #include "src/compiler/node-matchers.h"
 #include "src/compiler/node-properties.h"
@@ -2931,7 +2933,6 @@ VISIT_ATOMIC_BINOP(Xor)
 #define SIMD_UNOP_LIST(V)   \
  V(F64x2Sqrt)              \
  V(F64x2ConvertLowI32x4S)  \
-  V(F64x2ConvertLowI32x4U)  \
  V(F64x2PromoteLowF32x4)   \
  V(F32x4SConvertI32x4)     \
  V(F32x4Abs)               \
@@ -3726,6 +3727,13 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) {
       arraysize(temps), temps);
 }
+void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand dst =
+      IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
+  Emit(kX64F64x2ConvertLowI32x4U, dst, g.UseRegister(node->InputAt(0)));
+}
 void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
  X64OperandGenerator g(this);
  if (CpuFeatures::IsSupported(AVX)) {