MIPS[64]: Implement 3R MSA instructions in simulator

Bug: Change-Id: Ia7bbbb2aba28ec36b470a1b23ebe38fc3a09e600 Reviewed-on: https://chromium-review.googlesource.com/657757 Commit-Queue: Ivica Bogosavljevic <ivica.bogosavljevic@imgtec.com> Reviewed-by: Miran Karić <Miran.Karic@imgtec.com> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/master@{#48108}

MIPS[64]: Implement 3R MSA instructions in simulator
Bug: Change-Id: Ia7bbbb2aba28ec36b470a1b23ebe38fc3a09e600 Reviewed-on: https://chromium-review.googlesource.com/657757 Commit-Queue: Ivica Bogosavljevic <ivica.bogosavljevic@imgtec.com> Reviewed-by: Miran Karić <Miran.Karic@imgtec.com> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/master@{#48108}
01e3be50 · Ivica Bogosavljevic · Commit Bot · a76d0a77 · 01e3be50 · 01e3be50
Commit 01e3be50 authored Sep 21, 2017 by Ivica Bogosavljevic Committed by Commit Bot Sep 21, 2017
7 changed files
--- a/src/mips/simulator-mips.cc
+++ b/src/mips/simulator-mips.cc
@@ -4897,53 +4897,213 @@ void Simulator::DecodeTypeMsaMI10() {
 #undef MSA_MI10_STORE
 }

-void Simulator::DecodeTypeMsa3R() {
-  DCHECK(IsMipsArchVariant(kMips32r6));
-  DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
-  uint32_t opcode = instr_.InstructionBits() & kMsa3RMask;
+template <typename T>
+T Simulator::Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt) {
+  typedef typename std::make_unsigned<T>::type uT;
+  T res;
+  T wt_modulo = wt % (sizeof(T) * 8);
  switch (opcode) {
    case SLL_MSA:
+      res = static_cast<T>(ws << wt_modulo);
+      break;
    case SRA_MSA:
+      res = static_cast<T>(ArithmeticShiftRight(ws, wt_modulo));
+      break;
    case SRL_MSA:
+      res = static_cast<T>(static_cast<uT>(ws) >> wt_modulo);
+      break;
    case BCLR:
+      res = static_cast<T>(static_cast<T>(~(1ull << wt_modulo)) & ws);
+      break;
    case BSET:
+      res = static_cast<T>(static_cast<T>(1ull << wt_modulo) | ws);
+      break;
    case BNEG:
-    case BINSL:
-    case BINSR:
+      res = static_cast<T>(static_cast<T>(1ull << wt_modulo) ^ ws);
+      break;
+    case BINSL: {
+      int elem_size = 8 * sizeof(T);
+      int bits = wt_modulo + 1;
+      if (bits == elem_size) {
+        res = static_cast<T>(ws);
+      } else {
+        uint64_t mask = ((1ull << bits) - 1) << (elem_size - bits);
+        res = static_cast<T>((static_cast<T>(mask) & ws) |
+                             (static_cast<T>(~mask) & wd));
+      }
+    } break;
+    case BINSR: {
+      int elem_size = 8 * sizeof(T);
+      int bits = wt_modulo + 1;
+      if (bits == elem_size) {
+        res = static_cast<T>(ws);
+      } else {
+        uint64_t mask = (1ull << bits) - 1;
+        res = static_cast<T>((static_cast<T>(mask) & ws) |
+                             (static_cast<T>(~mask) & wd));
+      }
+    } break;
    case ADDV:
+      res = ws + wt;
+      break;
    case SUBV:
+      res = ws - wt;
+      break;
    case MAX_S:
+      res = Max(ws, wt);
+      break;
    case MAX_U:
+      res = static_cast<T>(Max(static_cast<uT>(ws), static_cast<uT>(wt)));
+      break;
    case MIN_S:
+      res = Min(ws, wt);
+      break;
    case MIN_U:
+      res = static_cast<T>(Min(static_cast<uT>(ws), static_cast<uT>(wt)));
+      break;
    case MAX_A:
+      // We use negative abs in order to avoid problems
+      // with corner case for MIN_INT
+      res = Nabs(ws) < Nabs(wt) ? ws : wt;
+      break;
    case MIN_A:
+      // We use negative abs in order to avoid problems
+      // with corner case for MIN_INT
+      res = Nabs(ws) > Nabs(wt) ? ws : wt;
+      break;
    case CEQ:
+      res = static_cast<T>(!Compare(ws, wt) ? -1ull : 0ull);
+      break;
    case CLT_S:
+      res = static_cast<T>((Compare(ws, wt) == -1) ? -1ull : 0ull);
+      break;
    case CLT_U:
+      res = static_cast<T>(
+          (Compare(static_cast<uT>(ws), static_cast<uT>(wt)) == -1) ? -1ull
+                                                                    : 0ull);
+      break;
    case CLE_S:
+      res = static_cast<T>((Compare(ws, wt) != 1) ? -1ull : 0ull);
+      break;
    case CLE_U:
+      res = static_cast<T>(
+          (Compare(static_cast<uT>(ws), static_cast<uT>(wt)) != 1) ? -1ull
+                                                                   : 0ull);
+      break;
    case ADD_A:
-    case ADDS_A:
+      res = static_cast<T>(Abs(ws) + Abs(wt));
+      break;
+    case ADDS_A: {
+      T ws_nabs = Nabs(ws);
+      T wt_nabs = Nabs(wt);
+      if (ws_nabs < -std::numeric_limits<T>::max() - wt_nabs) {
+        res = std::numeric_limits<T>::max();
+      } else {
+        res = -(ws_nabs + wt_nabs);
+      }
+    } break;
    case ADDS_S:
-    case ADDS_U:
+      res = SaturateAdd(ws, wt);
+      break;
+    case ADDS_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>(SaturateAdd(ws_u, wt_u));
+    } break;
    case AVE_S:
-    case AVE_U:
+      res = static_cast<T>((wt & ws) + ((wt ^ ws) >> 1));
+      break;
+    case AVE_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>((wt_u & ws_u) + ((wt_u ^ ws_u) >> 1));
+    } break;
    case AVER_S:
-    case AVER_U:
+      res = static_cast<T>((wt | ws) - ((wt ^ ws) >> 1));
+      break;
+    case AVER_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>((wt_u | ws_u) - ((wt_u ^ ws_u) >> 1));
+    } break;
    case SUBS_S:
-    case SUBS_U:
-    case SUBSUS_U:
-    case SUBSUU_S:
+      res = SaturateSub(ws, wt);
+      break;
+    case SUBS_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>(SaturateSub(ws_u, wt_u));
+    } break;
+    case SUBSUS_U: {
+      uT wsu = static_cast<uT>(ws);
+      if (wt > 0) {
+        uT wtu = static_cast<uT>(wt);
+        if (wtu > wsu) {
+          res = 0;
+        } else {
+          res = static_cast<T>(wsu - wtu);
+        }
+      } else {
+        if (wsu > std::numeric_limits<uT>::max() + wt) {
+          res = static_cast<T>(std::numeric_limits<uT>::max());
+        } else {
+          res = static_cast<T>(wsu - wt);
+        }
+      }
+    } break;
+    case SUBSUU_S: {
+      uT wsu = static_cast<uT>(ws);
+      uT wtu = static_cast<uT>(wt);
+      uT wdu;
+      if (wsu > wtu) {
+        wdu = wsu - wtu;
+        if (wdu > std::numeric_limits<T>::max()) {
+          res = std::numeric_limits<T>::max();
+        } else {
+          res = static_cast<T>(wdu);
+        }
+      } else {
+        wdu = wtu - wsu;
+        CHECK(-std::numeric_limits<T>::max() ==
+              std::numeric_limits<T>::min() + 1);
+        if (wdu <= std::numeric_limits<T>::max()) {
+          res = -static_cast<T>(wdu);
+        } else {
+          res = std::numeric_limits<T>::min();
+        }
+      }
+    } break;
    case ASUB_S:
-    case ASUB_U:
+      res = static_cast<T>(Abs(ws - wt));
+      break;
+    case ASUB_U: {
+      uT wsu = static_cast<uT>(ws);
+      uT wtu = static_cast<uT>(wt);
+      res = static_cast<T>(wsu > wtu ? wsu - wtu : wtu - wsu);
+    } break;
    case MULV:
+      res = ws * wt;
+      break;
    case MADDV:
+      res = wd + ws * wt;
+      break;
    case MSUBV:
+      res = wd - ws * wt;
+      break;
    case DIV_S_MSA:
+      res = wt != 0 ? ws / wt : static_cast<T>(Unpredictable);
+      break;
    case DIV_U:
+      res = wt != 0 ? static_cast<T>(static_cast<uT>(ws) / static_cast<uT>(wt))
+                    : static_cast<T>(Unpredictable);
+      break;
    case MOD_S:
+      res = wt != 0 ? ws % wt : static_cast<T>(Unpredictable);
+      break;
    case MOD_U:
+      res = wt != 0 ? static_cast<T>(static_cast<uT>(ws) % static_cast<uT>(wt))
+                    : static_cast<T>(Unpredictable);
+      break;
    case DOTP_S:
    case DOTP_U:
    case DPADD_S:
@@ -4959,8 +5119,17 @@ void Simulator::DecodeTypeMsa3R() {
    case ILVEV:
    case ILVOD:
    case VSHF:
-    case SRAR:
-    case SRLR:
+      UNIMPLEMENTED();
+      break;
+    case SRAR: {
+      int bit = wt_modulo == 0 ? 0 : (ws >> (wt_modulo - 1)) & 1;
+      res = static_cast<T>(ArithmeticShiftRight(ws, wt_modulo) + bit);
+    } break;
+    case SRLR: {
+      uT wsu = static_cast<uT>(ws);
+      int bit = wt_modulo == 0 ? 0 : (wsu >> (wt_modulo - 1)) & 1;
+      res = static_cast<T>((wsu >> wt_modulo) + bit);
+    } break;
    case HADD_S:
    case HADD_U:
    case HSUB_S:
@@ -4970,9 +5139,45 @@ void Simulator::DecodeTypeMsa3R() {
    default:
      UNREACHABLE();
  }
-}
+  return res;
+  }

-void Simulator::DecodeTypeMsa3RF() {
+  void Simulator::DecodeTypeMsa3R() {
+    DCHECK(IsMipsArchVariant(kMips32r6));
+    DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
+    uint32_t opcode = instr_.InstructionBits() & kMsa3RMask;
+    msa_reg_t ws, wd, wt;
+
+#define MSA_3R_DF(elem, num_of_lanes)                                          \
+  get_msa_register(instr_.WdValue(), wd.elem);                                 \
+  get_msa_register(instr_.WsValue(), ws.elem);                                 \
+  get_msa_register(instr_.WtValue(), wt.elem);                                 \
+  for (int i = 0; i < num_of_lanes; i++) {                                     \
+    wd.elem[i] = Msa3RInstrHelper(opcode, wd.elem[i], ws.elem[i], wt.elem[i]); \
+  }                                                                            \
+  set_msa_register(instr_.WdValue(), wd.elem);                                 \
+  TraceMSARegWr(wd.elem);
+
+    switch (DecodeMsaDataFormat()) {
+      case MSA_BYTE:
+        MSA_3R_DF(b, kMSALanesByte);
+        break;
+      case MSA_HALF:
+        MSA_3R_DF(h, kMSALanesHalf);
+        break;
+      case MSA_WORD:
+        MSA_3R_DF(w, kMSALanesWord);
+        break;
+      case MSA_DWORD:
+        MSA_3R_DF(d, kMSALanesDword);
+        break;
+      default:
+        UNREACHABLE();
+    }
+#undef MSA_3R_DF
+  }
+
+  void Simulator::DecodeTypeMsa3RF() {
    DCHECK(IsMipsArchVariant(kMips32r6));
    DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
    uint32_t opcode = instr_.InstructionBits() & kMsa3RFMask;
@@ -5023,9 +5228,9 @@ void Simulator::DecodeTypeMsa3RF() {
      default:
        UNREACHABLE();
    }
-}
+  }

-void Simulator::DecodeTypeMsaVec() {
+  void Simulator::DecodeTypeMsaVec() {
    DCHECK(IsMipsArchVariant(kMips32r6));
    DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
    uint32_t opcode = instr_.InstructionBits() & kMsaVECMask;
@@ -5066,9 +5271,9 @@ void Simulator::DecodeTypeMsaVec() {
    }
    set_msa_register(instr_.WdValue(), wd.w);
    TraceMSARegWr(wd.d);
-}
+  }

-void Simulator::DecodeTypeMsa2R() {
+  void Simulator::DecodeTypeMsa2R() {
    DCHECK(IsMipsArchVariant(kMips32r6));
    DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
    uint32_t opcode = instr_.InstructionBits() & kMsa2RMask;

--- a/src/mips/simulator-mips.h
+++ b/src/mips/simulator-mips.h
@@ -436,6 +436,8 @@ class Simulator {
  T MsaI5InstrHelper(uint32_t opcode, T ws, int32_t i5);
  template <typename T>
  T MsaBitInstrHelper(uint32_t opcode, T wd, T ws, int32_t m);
+  template <typename T>
+  T Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt);

  inline int32_t rs_reg() const { return instr_.RsValue(); }
  inline int32_t rs() const { return get_register(rs_reg()); }

--- a/src/mips64/simulator-mips64.cc
+++ b/src/mips64/simulator-mips64.cc
@@ -5122,53 +5122,213 @@ void Simulator::DecodeTypeMsaMI10() {
 #undef MSA_MI10_STORE
 }

-void Simulator::DecodeTypeMsa3R() {
-  DCHECK(kArchVariant == kMips64r6);
-  DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
-  uint32_t opcode = instr_.InstructionBits() & kMsa3RMask;
+template <typename T>
+T Simulator::Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt) {
+  typedef typename std::make_unsigned<T>::type uT;
+  T res;
+  int wt_modulo = wt % (sizeof(T) * 8);
  switch (opcode) {
    case SLL_MSA:
+      res = static_cast<T>(ws << wt_modulo);
+      break;
    case SRA_MSA:
+      res = static_cast<T>(ArithmeticShiftRight(ws, wt_modulo));
+      break;
    case SRL_MSA:
+      res = static_cast<T>(static_cast<uT>(ws) >> wt_modulo);
+      break;
    case BCLR:
+      res = static_cast<T>(static_cast<T>(~(1ull << wt_modulo)) & ws);
+      break;
    case BSET:
+      res = static_cast<T>(static_cast<T>(1ull << wt_modulo) | ws);
+      break;
    case BNEG:
-    case BINSL:
-    case BINSR:
+      res = static_cast<T>(static_cast<T>(1ull << wt_modulo) ^ ws);
+      break;
+    case BINSL: {
+      int elem_size = 8 * sizeof(T);
+      int bits = wt_modulo + 1;
+      if (bits == elem_size) {
+        res = static_cast<T>(ws);
+      } else {
+        uint64_t mask = ((1ull << bits) - 1) << (elem_size - bits);
+        res = static_cast<T>((static_cast<T>(mask) & ws) |
+                             (static_cast<T>(~mask) & wd));
+      }
+    } break;
+    case BINSR: {
+      int elem_size = 8 * sizeof(T);
+      int bits = wt_modulo + 1;
+      if (bits == elem_size) {
+        res = static_cast<T>(ws);
+      } else {
+        uint64_t mask = (1ull << bits) - 1;
+        res = static_cast<T>((static_cast<T>(mask) & ws) |
+                             (static_cast<T>(~mask) & wd));
+      }
+    } break;
    case ADDV:
+      res = ws + wt;
+      break;
    case SUBV:
+      res = ws - wt;
+      break;
    case MAX_S:
+      res = Max(ws, wt);
+      break;
    case MAX_U:
+      res = static_cast<T>(Max(static_cast<uT>(ws), static_cast<uT>(wt)));
+      break;
    case MIN_S:
+      res = Min(ws, wt);
+      break;
    case MIN_U:
+      res = static_cast<T>(Min(static_cast<uT>(ws), static_cast<uT>(wt)));
+      break;
    case MAX_A:
+      // We use negative abs in order to avoid problems
+      // with corner case for MIN_INT
+      res = Nabs(ws) < Nabs(wt) ? ws : wt;
+      break;
    case MIN_A:
+      // We use negative abs in order to avoid problems
+      // with corner case for MIN_INT
+      res = Nabs(ws) > Nabs(wt) ? ws : wt;
+      break;
    case CEQ:
+      res = static_cast<T>(!Compare(ws, wt) ? -1ull : 0ull);
+      break;
    case CLT_S:
+      res = static_cast<T>((Compare(ws, wt) == -1) ? -1ull : 0ull);
+      break;
    case CLT_U:
+      res = static_cast<T>(
+          (Compare(static_cast<uT>(ws), static_cast<uT>(wt)) == -1) ? -1ull
+                                                                    : 0ull);
+      break;
    case CLE_S:
+      res = static_cast<T>((Compare(ws, wt) != 1) ? -1ull : 0ull);
+      break;
    case CLE_U:
+      res = static_cast<T>(
+          (Compare(static_cast<uT>(ws), static_cast<uT>(wt)) != 1) ? -1ull
+                                                                   : 0ull);
+      break;
    case ADD_A:
-    case ADDS_A:
+      res = static_cast<T>(Abs(ws) + Abs(wt));
+      break;
+    case ADDS_A: {
+      T ws_nabs = Nabs(ws);
+      T wt_nabs = Nabs(wt);
+      if (ws_nabs < -std::numeric_limits<T>::max() - wt_nabs) {
+        res = std::numeric_limits<T>::max();
+      } else {
+        res = -(ws_nabs + wt_nabs);
+      }
+    } break;
    case ADDS_S:
-    case ADDS_U:
+      res = SaturateAdd(ws, wt);
+      break;
+    case ADDS_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>(SaturateAdd(ws_u, wt_u));
+    } break;
    case AVE_S:
-    case AVE_U:
+      res = static_cast<T>((wt & ws) + ((wt ^ ws) >> 1));
+      break;
+    case AVE_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>((wt_u & ws_u) + ((wt_u ^ ws_u) >> 1));
+    } break;
    case AVER_S:
-    case AVER_U:
+      res = static_cast<T>((wt | ws) - ((wt ^ ws) >> 1));
+      break;
+    case AVER_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>((wt_u | ws_u) - ((wt_u ^ ws_u) >> 1));
+    } break;
    case SUBS_S:
-    case SUBS_U:
-    case SUBSUS_U:
-    case SUBSUU_S:
+      res = SaturateSub(ws, wt);
+      break;
+    case SUBS_U: {
+      uT ws_u = static_cast<uT>(ws);
+      uT wt_u = static_cast<uT>(wt);
+      res = static_cast<T>(SaturateSub(ws_u, wt_u));
+    } break;
+    case SUBSUS_U: {
+      uT wsu = static_cast<uT>(ws);
+      if (wt > 0) {
+        uT wtu = static_cast<uT>(wt);
+        if (wtu > wsu) {
+          res = 0;
+        } else {
+          res = static_cast<T>(wsu - wtu);
+        }
+      } else {
+        if (wsu > std::numeric_limits<uT>::max() + wt) {
+          res = static_cast<T>(std::numeric_limits<uT>::max());
+        } else {
+          res = static_cast<T>(wsu - wt);
+        }
+      }
+    } break;
+    case SUBSUU_S: {
+      uT wsu = static_cast<uT>(ws);
+      uT wtu = static_cast<uT>(wt);
+      uT wdu;
+      if (wsu > wtu) {
+        wdu = wsu - wtu;
+        if (wdu > std::numeric_limits<T>::max()) {
+          res = std::numeric_limits<T>::max();
+        } else {
+          res = static_cast<T>(wdu);
+        }
+      } else {
+        wdu = wtu - wsu;
+        CHECK(-std::numeric_limits<T>::max() ==
+              std::numeric_limits<T>::min() + 1);
+        if (wdu <= std::numeric_limits<T>::max()) {
+          res = -static_cast<T>(wdu);
+        } else {
+          res = std::numeric_limits<T>::min();
+        }
+      }
+    } break;
    case ASUB_S:
-    case ASUB_U:
+      res = static_cast<T>(Abs(ws - wt));
+      break;
+    case ASUB_U: {
+      uT wsu = static_cast<uT>(ws);
+      uT wtu = static_cast<uT>(wt);
+      res = static_cast<T>(wsu > wtu ? wsu - wtu : wtu - wsu);
+    } break;
    case MULV:
+      res = ws * wt;
+      break;
    case MADDV:
+      res = wd + ws * wt;
+      break;
    case MSUBV:
+      res = wd - ws * wt;
+      break;
    case DIV_S_MSA:
+      res = wt != 0 ? ws / wt : static_cast<T>(Unpredictable);
+      break;
    case DIV_U:
+      res = wt != 0 ? static_cast<T>(static_cast<uT>(ws) / static_cast<uT>(wt))
+                    : static_cast<T>(Unpredictable);
+      break;
    case MOD_S:
+      res = wt != 0 ? ws % wt : static_cast<T>(Unpredictable);
+      break;
    case MOD_U:
+      res = wt != 0 ? static_cast<T>(static_cast<uT>(ws) % static_cast<uT>(wt))
+                    : static_cast<T>(Unpredictable);
+      break;
    case DOTP_S:
    case DOTP_U:
    case DPADD_S:
@@ -5184,8 +5344,17 @@ void Simulator::DecodeTypeMsa3R() {
    case ILVEV:
    case ILVOD:
    case VSHF:
-    case SRAR:
-    case SRLR:
+      UNIMPLEMENTED();
+      break;
+    case SRAR: {
+      int bit = wt_modulo == 0 ? 0 : (ws >> (wt_modulo - 1)) & 1;
+      res = static_cast<T>(ArithmeticShiftRight(ws, wt_modulo) + bit);
+    } break;
+    case SRLR: {
+      uT wsu = static_cast<uT>(ws);
+      int bit = wt_modulo == 0 ? 0 : (wsu >> (wt_modulo - 1)) & 1;
+      res = static_cast<T>((wsu >> wt_modulo) + bit);
+    } break;
    case HADD_S:
    case HADD_U:
    case HSUB_S:
@@ -5195,6 +5364,42 @@ void Simulator::DecodeTypeMsa3R() {
    default:
      UNREACHABLE();
  }
+  return res;
+}
+
+void Simulator::DecodeTypeMsa3R() {
+  DCHECK(kArchVariant == kMips64r6);
+  DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
+  uint32_t opcode = instr_.InstructionBits() & kMsa3RMask;
+  msa_reg_t ws, wd, wt;
+
+#define MSA_3R_DF(elem, num_of_lanes)                                          \
+  get_msa_register(instr_.WdValue(), wd.elem);                                 \
+  get_msa_register(instr_.WsValue(), ws.elem);                                 \
+  get_msa_register(instr_.WtValue(), wt.elem);                                 \
+  for (int i = 0; i < num_of_lanes; i++) {                                     \
+    wd.elem[i] = Msa3RInstrHelper(opcode, wd.elem[i], ws.elem[i], wt.elem[i]); \
+  }                                                                            \
+  set_msa_register(instr_.WdValue(), wd.elem);                                 \
+  TraceMSARegWr(wd.elem);
+
+  switch (DecodeMsaDataFormat()) {
+    case MSA_BYTE:
+      MSA_3R_DF(b, kMSALanesByte);
+      break;
+    case MSA_HALF:
+      MSA_3R_DF(h, kMSALanesHalf);
+      break;
+    case MSA_WORD:
+      MSA_3R_DF(w, kMSALanesWord);
+      break;
+    case MSA_DWORD:
+      MSA_3R_DF(d, kMSALanesDword);
+      break;
+    default:
+      UNREACHABLE();
+  }
+#undef MSA_3R_DF
 }

 void Simulator::DecodeTypeMsa3RF() {

--- a/src/mips64/simulator-mips64.h
+++ b/src/mips64/simulator-mips64.h
@@ -452,6 +452,8 @@ class Simulator {
  T MsaI5InstrHelper(uint32_t opcode, T ws, int32_t i5);
  template <typename T>
  T MsaBitInstrHelper(uint32_t opcode, T wd, T ws, int32_t m);
+  template <typename T>
+  T Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt);

  // Executing is handled based on the instruction type.
  void DecodeTypeRegister();

--- a/src/utils.h
+++ b/src/utils.h
@@ -196,6 +196,13 @@ typename std::make_unsigned<T>::type Abs(T a) {
  return (x ^ y) - y;
 }

+// Returns the negative absolute value of its argument.
+template <typename T,
+          typename = typename std::enable_if<std::is_signed<T>::value>::type>
+T Nabs(T a) {
+  return a < 0 ? a : -a;
+}
+
 // Floor(-0.0) == 0.0
 inline double Floor(double x) {
 #if V8_CC_MSVC
@@ -233,6 +240,47 @@ inline double Pow(double x, double y) {
  return std::pow(x, y);
 }

+template <typename T>
+T SaturateAdd(T a, T b) {
+  if (std::is_signed<T>::value) {
+    if (a > 0 && b > 0) {
+      if (a > std::numeric_limits<T>::max() - b) {
+        return std::numeric_limits<T>::max();
+      }
+    } else if (a < 0 && b < 0) {
+      if (a < std::numeric_limits<T>::min() - b) {
+        return std::numeric_limits<T>::min();
+      }
+    }
+  } else {
+    CHECK(std::is_unsigned<T>::value);
+    if (a > std::numeric_limits<T>::max() - b) {
+      return std::numeric_limits<T>::max();
+    }
+  }
+  return a + b;
+}
+
+template <typename T>
+T SaturateSub(T a, T b) {
+  if (std::is_signed<T>::value) {
+    if (a > 0 && b < 0) {
+      if (a > std::numeric_limits<T>::max() + b) {
+        return std::numeric_limits<T>::max();
+      }
+    } else if (a < 0 && b > 0) {
+      if (a < std::numeric_limits<T>::min() + b) {
+        return std::numeric_limits<T>::min();
+      }
+    }
+  } else {
+    CHECK(std::is_unsigned<T>::value);
+    if (a < b) {
+      return static_cast<T>(0);
+    }
+  }
+  return a - b;
+}

 // ----------------------------------------------------------------------------
 // BitField is a help template for encoding and decode bitfield with

--- a/test/cctest/test-assembler-mips.cc
+++ b/test/cctest/test-assembler-mips.cc
--- a/test/cctest/test-assembler-mips64.cc
+++ b/test/cctest/test-assembler-mips64.cc