[wasm-simd][liftoff][ia32] Prototype load lane

Prototype load lane instructions on ia32 Liftoff. We generalize the pinsr* macro-assembler functions to take an extra input, following the 3 operand + 1 imm form of the AVX instructions. Bug: v8:10975 Change-Id: I3fa10d149b011b62edd58372148446b663f3dc3c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2619417Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72179}

[wasm-simd][liftoff][ia32] Prototype load lane
Prototype load lane instructions on ia32 Liftoff. We generalize the pinsr* macro-assembler functions to take an extra input, following the 3 operand + 1 imm form of the AVX instructions. Bug: v8:10975 Change-Id: I3fa10d149b011b62edd58372148446b663f3dc3c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2619417Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#72179}
857823fd · Zhi An Ng · Commit Bot · 5208063b · 857823fd · 857823fd
Commit 857823fd authored Jan 15, 2021 by Zhi An Ng Committed by Commit Bot Jan 20, 2021
3 changed files
--- a/src/codegen/ia32/macro-assembler-ia32.cc
+++ b/src/codegen/ia32/macro-assembler-ia32.cc
@@ -1936,28 +1936,40 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
 }

 void TurboAssembler::Pinsrb(XMMRegister dst, Operand src, int8_t imm8) {
+  Pinsrb(dst, dst, src, imm8);
+}
+
+void TurboAssembler::Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2,
+                            int8_t imm8) {
  if (CpuFeatures::IsSupported(AVX)) {
    CpuFeatureScope scope(this, AVX);
-    vpinsrb(dst, dst, src, imm8);
+    vpinsrb(dst, src1, src2, imm8);
    return;
  }
  if (CpuFeatures::IsSupported(SSE4_1)) {
    CpuFeatureScope sse_scope(this, SSE4_1);
-    pinsrb(dst, src, imm8);
+    if (dst != src1) {
+      movdqu(dst, src1);
+    }
+    pinsrb(dst, src2, imm8);
    return;
  }
  FATAL("no AVX or SSE4.1 support");
 }

-void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
+void TurboAssembler::Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2,
+                            uint8_t imm8) {
  if (CpuFeatures::IsSupported(AVX)) {
    CpuFeatureScope scope(this, AVX);
-    vpinsrd(dst, dst, src, imm8);
+    vpinsrd(dst, src1, src2, imm8);
    return;
  }
+  if (dst != src1) {
+    movdqu(dst, src1);
+  }
  if (CpuFeatures::IsSupported(SSE4_1)) {
    CpuFeatureScope sse_scope(this, SSE4_1);
-    pinsrd(dst, src, imm8);
+    pinsrd(dst, src2, imm8);
    return;
  }
  // Without AVX or SSE, we can only have 64-bit values in xmm registers.
@@ -1968,10 +1980,10 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
  // Write original content of {dst} to the stack.
  movsd(Operand(esp, 0), dst);
  // Overwrite the portion specified in {imm8}.
-  if (src.is_reg_only()) {
-    mov(Operand(esp, imm8 * kUInt32Size), src.reg());
+  if (src2.is_reg_only()) {
+    mov(Operand(esp, imm8 * kUInt32Size), src2.reg());
  } else {
-    movss(dst, src);
+    movss(dst, src2);
    movss(Operand(esp, imm8 * kUInt32Size), dst);
  }
  // Load back the full value into {dst}.
@@ -1979,13 +1991,25 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
  add(esp, Immediate(kDoubleSize));
 }

+void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, uint8_t imm8) {
+  Pinsrd(dst, dst, src, imm8);
+}
+
 void TurboAssembler::Pinsrw(XMMRegister dst, Operand src, int8_t imm8) {
+  Pinsrw(dst, dst, src, imm8);
+}
+
+void TurboAssembler::Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2,
+                            int8_t imm8) {
  if (CpuFeatures::IsSupported(AVX)) {
    CpuFeatureScope scope(this, AVX);
-    vpinsrw(dst, dst, src, imm8);
+    vpinsrw(dst, src1, src2, imm8);
    return;
  } else {
-    pinsrw(dst, src, imm8);
+    if (dst != src1) {
+      movdqu(dst, src1);
+    }
+    pinsrw(dst, src2, imm8);
    return;
  }
 }

--- a/src/codegen/ia32/macro-assembler-ia32.h
+++ b/src/codegen/ia32/macro-assembler-ia32.h
@@ -567,14 +567,20 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
    Pinsrb(dst, Operand(src), imm8);
  }
  void Pinsrb(XMMRegister dst, Operand src, int8_t imm8);
+  // Moves src1 to dst if AVX is not supported.
+  void Pinsrb(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
  void Pinsrd(XMMRegister dst, Register src, uint8_t imm8) {
    Pinsrd(dst, Operand(src), imm8);
  }
  void Pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
+  // Moves src1 to dst if AVX is not supported.
+  void Pinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8);
  void Pinsrw(XMMRegister dst, Register src, int8_t imm8) {
    Pinsrw(dst, Operand(src), imm8);
  }
  void Pinsrw(XMMRegister dst, Operand src, int8_t imm8);
+  // Moves src1 to dst if AVX is not supported.
+  void Pinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8);
  void Vbroadcastss(XMMRegister dst, Operand src);
  void Extractps(Operand dst, XMMRegister src, uint8_t imm8);


--- a/src/wasm/baseline/ia32/liftoff-assembler-ia32.h
+++ b/src/wasm/baseline/ia32/liftoff-assembler-ia32.h
@@ -2763,7 +2763,26 @@ void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
                                Register addr, Register offset_reg,
                                uintptr_t offset_imm, LoadType type,
                                uint8_t laneidx, uint32_t* protected_load_pc) {
-  bailout(kSimd, "loadlane");
+  DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
+  Operand src_op{addr, offset_reg, times_1, static_cast<int32_t>(offset_imm)};
+  *protected_load_pc = pc_offset();
+
+  MachineType mem_type = type.mem_type();
+  if (mem_type == MachineType::Int8()) {
+    Pinsrb(dst.fp(), src.fp(), src_op, laneidx);
+  } else if (mem_type == MachineType::Int16()) {
+    Pinsrw(dst.fp(), src.fp(), src_op, laneidx);
+  } else if (mem_type == MachineType::Int32()) {
+    Pinsrd(dst.fp(), src.fp(), src_op, laneidx);
+  } else {
+    DCHECK_EQ(MachineType::Int64(), mem_type);
+    if (laneidx == 0) {
+      Movlps(dst.fp(), src.fp(), src_op);
+    } else {
+      DCHECK_EQ(1, laneidx);
+      Movhps(dst.fp(), src.fp(), src_op);
+    }
+  }
 }

 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,