PPC: [wasm-simd] Implement FP div, min and max operations

Change-Id: I0a3ac5e56504c7c99f94a29f30c3827e99718b7d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2423615Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/master@{#70066}

PPC: [wasm-simd] Implement FP div, min and max operations
Change-Id: I0a3ac5e56504c7c99f94a29f30c3827e99718b7d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2423615Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/master@{#70066}
aa55bdb6 · Milad Fa · Commit Bot · 92aaace1 · aa55bdb6 · aa55bdb6
Commit aa55bdb6 authored Sep 22, 2020 by Milad Fa Committed by Commit Bot Sep 22, 2020
5 changed files
--- a/src/codegen/ppc/constants-ppc.h
+++ b/src/codegen/ppc/constants-ppc.h
@@ -2344,7 +2344,11 @@ using Instr = uint32_t;
  /* Vector Average Unsigned Halfword */                   \
  V(vavguh, VAVGUH, 0x10000442)                            \
  /* Vector Logical AND with Complement */                 \
-  V(vandc, VANDC, 0x10000444)
+  V(vandc, VANDC, 0x10000444)                              \
+  /* Vector Minimum Single-Precision */                    \
+  V(vminfp, VMINFP, 0x1000044A)                            \
+  /* Vector Maximum Single-Precision */                    \
+  V(vmaxfp, VMAXFP, 0x1000040A)

 #define PPC_VX_OPCODE_C_FORM_LIST(V)       \
  /* Vector Unpack Low Signed Halfword */  \
@@ -2411,10 +2415,6 @@ using Instr = uint32_t;
  V(vgbbd, VGBBD, 0x1000050C)                                             \
  /* Vector Log Base 2 Estimate Single-Precision */                       \
  V(vlogefp, VLOGEFP, 0x100001CA)                                         \
-  /* Vector Maximum Single-Precision */                                   \
-  V(vmaxfp, VMAXFP, 0x1000040A)                                           \
-  /* Vector Minimum Single-Precision */                                   \
-  V(vminfp, VMINFP, 0x1000044A)                                           \
  /* Vector Merge High Byte */                                            \
  V(vmrghb, VMRGHB, 0x1000000C)                                           \
  /* Vector Merge High Halfword */                                        \

--- a/src/compiler/backend/ppc/code-generator-ppc.cc
+++ b/src/compiler/backend/ppc/code-generator-ppc.cc
@@ -3361,6 +3361,50 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ vandc(dst, src, i.InputSimd128Register(1));
      break;
    }
+    case kPPC_F64x2Div: {
+      __ xvdivdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+#define F64X2_MIN_MAX_NAN(result)                                       \
+  Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));   \
+  __ xvcmpeqdp(tempFPReg1, i.InputSimd128Register(0),                   \
+               i.InputSimd128Register(0));                              \
+  __ vsel(result, i.InputSimd128Register(0), result, tempFPReg1);       \
+  __ xvcmpeqdp(tempFPReg1, i.InputSimd128Register(1),                   \
+               i.InputSimd128Register(1));                              \
+  __ vsel(i.OutputSimd128Register(), i.InputSimd128Register(1), result, \
+          tempFPReg1);
+    case kPPC_F64x2Min: {
+      __ xvmindp(kScratchDoubleReg, i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      // We need to check if an input is NAN and preserve it.
+      F64X2_MIN_MAX_NAN(kScratchDoubleReg)
+      break;
+    }
+    case kPPC_F64x2Max: {
+      __ xvmaxdp(kScratchDoubleReg, i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      // We need to check if an input is NAN and preserve it.
+      F64X2_MIN_MAX_NAN(kScratchDoubleReg)
+      break;
+    }
+#undef F64X2_MIN_MAX_NAN
+    case kPPC_F32x4Div: {
+      __ xvdivsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4Min: {
+      __ vminfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4Max: {
+      __ vmaxfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
    case kPPC_StoreCompressTagged: {
      ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
      break;

--- a/src/compiler/backend/ppc/instruction-codes-ppc.h
+++ b/src/compiler/backend/ppc/instruction-codes-ppc.h
@@ -207,6 +207,9 @@ namespace compiler {
  V(PPC_F64x2Sqrt)                   \
  V(PPC_F64x2Qfma)                   \
  V(PPC_F64x2Qfms)                   \
+  V(PPC_F64x2Div)                    \
+  V(PPC_F64x2Min)                    \
+  V(PPC_F64x2Max)                    \
  V(PPC_F32x4Splat)                  \
  V(PPC_F32x4ExtractLane)            \
  V(PPC_F32x4ReplaceLane)            \
@@ -225,6 +228,9 @@ namespace compiler {
  V(PPC_F32x4Sqrt)                   \
  V(PPC_F32x4SConvertI32x4)          \
  V(PPC_F32x4UConvertI32x4)          \
+  V(PPC_F32x4Div)                    \
+  V(PPC_F32x4Min)                    \
+  V(PPC_F32x4Max)                    \
  V(PPC_I64x2Splat)                  \
  V(PPC_I64x2ExtractLane)            \
  V(PPC_I64x2ReplaceLane)            \

--- a/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
+++ b/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
@@ -130,6 +130,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kPPC_F64x2Sqrt:
    case kPPC_F64x2Qfma:
    case kPPC_F64x2Qfms:
+    case kPPC_F64x2Div:
+    case kPPC_F64x2Min:
+    case kPPC_F64x2Max:
    case kPPC_F32x4Splat:
    case kPPC_F32x4ExtractLane:
    case kPPC_F32x4ReplaceLane:
@@ -150,6 +153,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kPPC_F32x4UConvertI32x4:
    case kPPC_F32x4Qfma:
    case kPPC_F32x4Qfms:
+    case kPPC_F32x4Div:
+    case kPPC_F32x4Min:
+    case kPPC_F32x4Max:
    case kPPC_I64x2Splat:
    case kPPC_I64x2ExtractLane:
    case kPPC_I64x2ReplaceLane:

--- a/src/compiler/backend/ppc/instruction-selector-ppc.cc
+++ b/src/compiler/backend/ppc/instruction-selector-ppc.cc
@@ -2160,6 +2160,9 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
  V(F64x2Ne)               \
  V(F64x2Le)               \
  V(F64x2Lt)               \
+  V(F64x2Div)              \
+  V(F64x2Min)              \
+  V(F64x2Max)              \
  V(F32x4Add)              \
  V(F32x4AddHoriz)         \
  V(F32x4Sub)              \
@@ -2168,6 +2171,9 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
  V(F32x4Ne)               \
  V(F32x4Lt)               \
  V(F32x4Le)               \
+  V(F32x4Div)              \
+  V(F32x4Min)              \
+  V(F32x4Max)              \
  V(I64x2Add)              \
  V(I64x2Sub)              \
  V(I64x2Mul)              \
@@ -2452,18 +2458,6 @@ void InstructionSelector::EmitPrepareResults(
  }
 }

-void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
-
 void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }

 void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }