[riscv64] Add RISCV64 support for wasm-relaxed-simd

- Implement `kRiscvF32x4RecipApprox`, `kRiscvF32x4RecipSqrtApprox`, `kRiscvF32x4Qfma`, `kRiscvF32x4Qfms`, `kRiscvF64x2Qfma` and `kRiscvF64x2Qfms` in `code-generator-riscv64.cc` - Reuse lane-select, min-max and trunc instrctions in `instruction-selector-riscv64.cc` Bug: v8:11976 Change-Id: I8566f7e082a3d7071ec9fc64c742da82425a4d4d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3364077Reviewed-by: Yahan Lu <yahan@iscas.ac.cn> Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/main@{#78524}

[riscv64] Add RISCV64 support for wasm-relaxed-simd
- Implement `kRiscvF32x4RecipApprox`, `kRiscvF32x4RecipSqrtApprox`, `kRiscvF32x4Qfma`, `kRiscvF32x4Qfms`, `kRiscvF64x2Qfma` and `kRiscvF64x2Qfms` in `code-generator-riscv64.cc` - Reuse lane-select, min-max and trunc instrctions in `instruction-selector-riscv64.cc` Bug: v8:11976 Change-Id: I8566f7e082a3d7071ec9fc64c742da82425a4d4d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3364077Reviewed-by: Yahan Lu <yahan@iscas.ac.cn> Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/main@{#78524}
8861ca7b · Yujie Wang · Yahan Lu · c0cec07e · 8861ca7b · 8861ca7b
Commit 8861ca7b authored Jan 06, 2022 by Yujie Wang Committed by Yahan Lu Jan 10, 2022
11 changed files
--- a/src/codegen/riscv64/assembler-riscv64.h
+++ b/src/codegen/riscv64/assembler-riscv64.h
@@ -982,6 +982,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
  DEFINE_VFUNARY(vfclass_v, VFUNARY1_FUNCT6, VFCLASS_V)
  DEFINE_VFUNARY(vfsqrt_v, VFUNARY1_FUNCT6, VFSQRT_V)
+  DEFINE_VFUNARY(vfrsqrt7_v, VFUNARY1_FUNCT6, VFRSQRT7_V)
+  DEFINE_VFUNARY(vfrec7_v, VFUNARY1_FUNCT6, VFREC7_V)
 #undef DEFINE_VFUNARY
  void vnot_vv(VRegister dst, VRegister src, MaskType mask = NoMask) {

--- a/src/codegen/riscv64/constants-riscv64.h
+++ b/src/codegen/riscv64/constants-riscv64.h
@@ -957,7 +957,7 @@ enum Opcode : uint32_t {
  VFCLASS_V = 0b10000,
  VFSQRT_V = 0b00000,
-  VFSQRT7_V = 0b00100,
+  VFRSQRT7_V = 0b00100,
  VFREC7_V = 0b00101,
  VFADD_FUNCT6 = 0b000000,

--- a/src/compiler/backend/instruction-selector.cc
+++ b/src/compiler/backend/instruction-selector.cc
@@ -2785,16 +2785,18 @@ void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
 #endif  // !V8_TARGET_ARCH_ARM64
 #endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
-#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
+#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64 && \
-#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
+    !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_RISCV64
 void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); }
 void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
-#endif  // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
 #endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
+        // && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 &&
+        // !V8_TARGET_ARCH_RISCV64
-#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
+#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 && \
+    !V8_TARGET_ARCH_RISCV64
 void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
  UNIMPLEMENTED();
 }
@@ -2824,6 +2826,7 @@ void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
  UNIMPLEMENTED();
 }
 #endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
+        // && !V8_TARGET_ARCH_RISCV64
 void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

--- a/src/compiler/backend/riscv64/code-generator-riscv64.cc
+++ b/src/compiler/backend/riscv64/code-generator-riscv64.cc
@@ -2981,6 +2981,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      }
      break;
    }
+    case kRiscvF64x2Qfma: {
+      __ VU.set(kScratchReg, E64, m1);
+      __ vfmadd_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
+                   i.InputSimd128Register(0));
+      __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
+    case kRiscvF64x2Qfms: {
+      __ VU.set(kScratchReg, E64, m1);
+      __ vfnmsub_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
+                    i.InputSimd128Register(0));
+      __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
    case kRiscvF32x4ExtractLane: {
      __ VU.set(kScratchReg, E32, m1);
      __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
@@ -3155,6 +3169,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
      break;
    }
+    case kRiscvF32x4RecipApprox: {
+      __ VU.set(kScratchReg, E32, m1);
+      __ vfrec7_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kRiscvF32x4RecipSqrtApprox: {
+      __ VU.set(kScratchReg, E32, m1);
+      __ vfrsqrt7_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kRiscvF32x4Qfma: {
+      __ VU.set(kScratchReg, E32, m1);
+      __ vfmadd_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
+                   i.InputSimd128Register(0));
+      __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
+    case kRiscvF32x4Qfms: {
+      __ VU.set(kScratchReg, E32, m1);
+      __ vfnmsub_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
+                    i.InputSimd128Register(0));
+      __ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
    case kRiscvI64x2SConvertI32x4Low: {
      __ VU.set(kScratchReg, E64, m1);
      __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));

--- a/src/compiler/backend/riscv64/instruction-codes-riscv64.h
+++ b/src/compiler/backend/riscv64/instruction-codes-riscv64.h
@@ -237,6 +237,10 @@ namespace compiler {
  V(RiscvF32x4Sqrt)                         \
  V(RiscvF32x4RecipApprox)                  \
  V(RiscvF32x4RecipSqrtApprox)              \
+  V(RiscvF32x4Qfma)                         \
+  V(RiscvF32x4Qfms)                         \
+  V(RiscvF64x2Qfma)                         \
+  V(RiscvF64x2Qfms)                         \
  V(RiscvF32x4Add)                          \
  V(RiscvF32x4Sub)                          \
  V(RiscvF32x4Mul)                          \

--- a/src/compiler/backend/riscv64/instruction-scheduler-riscv64.cc
+++ b/src/compiler/backend/riscv64/instruction-scheduler-riscv64.cc
@@ -121,6 +121,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kRiscvF32x4Sqrt:
    case kRiscvF32x4RecipApprox:
    case kRiscvF32x4RecipSqrtApprox:
+    case kRiscvF64x2Qfma:
+    case kRiscvF64x2Qfms:
+    case kRiscvF32x4Qfma:
+    case kRiscvF32x4Qfms:
    case kRiscvF32x4ReplaceLane:
    case kRiscvF32x4SConvertI32x4:
    case kRiscvF32x4Splat:

--- a/src/compiler/backend/riscv64/instruction-selector-riscv64.cc
+++ b/src/compiler/backend/riscv64/instruction-selector-riscv64.cc
@@ -2803,63 +2803,67 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
  V(I16x8)                \
  V(I8x16)
-#define SIMD_UNOP_LIST(V)                                   \
+#define SIMD_UNOP_LIST(V)                                       \
-  V(F64x2Abs, kRiscvF64x2Abs)                               \
+  V(F64x2Abs, kRiscvF64x2Abs)                                   \
-  V(F64x2Neg, kRiscvF64x2Neg)                               \
+  V(F64x2Neg, kRiscvF64x2Neg)                                   \
-  V(F64x2Sqrt, kRiscvF64x2Sqrt)                             \
+  V(F64x2Sqrt, kRiscvF64x2Sqrt)                                 \
-  V(F64x2ConvertLowI32x4S, kRiscvF64x2ConvertLowI32x4S)     \
+  V(F64x2ConvertLowI32x4S, kRiscvF64x2ConvertLowI32x4S)         \
-  V(F64x2ConvertLowI32x4U, kRiscvF64x2ConvertLowI32x4U)     \
+  V(F64x2ConvertLowI32x4U, kRiscvF64x2ConvertLowI32x4U)         \
-  V(F64x2PromoteLowF32x4, kRiscvF64x2PromoteLowF32x4)       \
+  V(F64x2PromoteLowF32x4, kRiscvF64x2PromoteLowF32x4)           \
-  V(F64x2Ceil, kRiscvF64x2Ceil)                             \
+  V(F64x2Ceil, kRiscvF64x2Ceil)                                 \
-  V(F64x2Floor, kRiscvF64x2Floor)                           \
+  V(F64x2Floor, kRiscvF64x2Floor)                               \
-  V(F64x2Trunc, kRiscvF64x2Trunc)                           \
+  V(F64x2Trunc, kRiscvF64x2Trunc)                               \
-  V(F64x2NearestInt, kRiscvF64x2NearestInt)                 \
+  V(F64x2NearestInt, kRiscvF64x2NearestInt)                     \
-  V(I64x2Neg, kRiscvI64x2Neg)                               \
+  V(I64x2Neg, kRiscvI64x2Neg)                                   \
-  V(I64x2Abs, kRiscvI64x2Abs)                               \
+  V(I64x2Abs, kRiscvI64x2Abs)                                   \
-  V(I64x2BitMask, kRiscvI64x2BitMask)                       \
+  V(I64x2BitMask, kRiscvI64x2BitMask)                           \
-  V(F32x4SConvertI32x4, kRiscvF32x4SConvertI32x4)           \
+  V(F32x4SConvertI32x4, kRiscvF32x4SConvertI32x4)               \
-  V(F32x4UConvertI32x4, kRiscvF32x4UConvertI32x4)           \
+  V(F32x4UConvertI32x4, kRiscvF32x4UConvertI32x4)               \
-  V(F32x4Abs, kRiscvF32x4Abs)                               \
+  V(F32x4Abs, kRiscvF32x4Abs)                                   \
-  V(F32x4Neg, kRiscvF32x4Neg)                               \
+  V(F32x4Neg, kRiscvF32x4Neg)                                   \
-  V(F32x4Sqrt, kRiscvF32x4Sqrt)                             \
+  V(F32x4Sqrt, kRiscvF32x4Sqrt)                                 \
-  V(F32x4RecipApprox, kRiscvF32x4RecipApprox)               \
+  V(F32x4RecipApprox, kRiscvF32x4RecipApprox)                   \
-  V(F32x4RecipSqrtApprox, kRiscvF32x4RecipSqrtApprox)       \
+  V(F32x4RecipSqrtApprox, kRiscvF32x4RecipSqrtApprox)           \
-  V(F32x4DemoteF64x2Zero, kRiscvF32x4DemoteF64x2Zero)       \
+  V(F32x4DemoteF64x2Zero, kRiscvF32x4DemoteF64x2Zero)           \
-  V(F32x4Ceil, kRiscvF32x4Ceil)                             \
+  V(F32x4Ceil, kRiscvF32x4Ceil)                                 \
-  V(F32x4Floor, kRiscvF32x4Floor)                           \
+  V(F32x4Floor, kRiscvF32x4Floor)                               \
-  V(F32x4Trunc, kRiscvF32x4Trunc)                           \
+  V(F32x4Trunc, kRiscvF32x4Trunc)                               \
-  V(F32x4NearestInt, kRiscvF32x4NearestInt)                 \
+  V(F32x4NearestInt, kRiscvF32x4NearestInt)                     \
-  V(I64x2SConvertI32x4Low, kRiscvI64x2SConvertI32x4Low)     \
+  V(I32x4RelaxedTruncF32x4S, kRiscvI32x4SConvertF32x4)          \
-  V(I64x2SConvertI32x4High, kRiscvI64x2SConvertI32x4High)   \
+  V(I32x4RelaxedTruncF32x4U, kRiscvI32x4UConvertF32x4)          \
-  V(I64x2UConvertI32x4Low, kRiscvI64x2UConvertI32x4Low)     \
+  V(I32x4RelaxedTruncF64x2SZero, kRiscvI32x4TruncSatF64x2SZero) \
-  V(I64x2UConvertI32x4High, kRiscvI64x2UConvertI32x4High)   \
+  V(I32x4RelaxedTruncF64x2UZero, kRiscvI32x4TruncSatF64x2UZero) \
-  V(I32x4SConvertF32x4, kRiscvI32x4SConvertF32x4)           \
+  V(I64x2SConvertI32x4Low, kRiscvI64x2SConvertI32x4Low)         \
-  V(I32x4UConvertF32x4, kRiscvI32x4UConvertF32x4)           \
+  V(I64x2SConvertI32x4High, kRiscvI64x2SConvertI32x4High)       \
-  V(I32x4Neg, kRiscvI32x4Neg)                               \
+  V(I64x2UConvertI32x4Low, kRiscvI64x2UConvertI32x4Low)         \
-  V(I32x4SConvertI16x8Low, kRiscvI32x4SConvertI16x8Low)     \
+  V(I64x2UConvertI32x4High, kRiscvI64x2UConvertI32x4High)       \
-  V(I32x4SConvertI16x8High, kRiscvI32x4SConvertI16x8High)   \
+  V(I32x4SConvertF32x4, kRiscvI32x4SConvertF32x4)               \
-  V(I32x4UConvertI16x8Low, kRiscvI32x4UConvertI16x8Low)     \
+  V(I32x4UConvertF32x4, kRiscvI32x4UConvertF32x4)               \
-  V(I32x4UConvertI16x8High, kRiscvI32x4UConvertI16x8High)   \
+  V(I32x4Neg, kRiscvI32x4Neg)                                   \
-  V(I32x4Abs, kRiscvI32x4Abs)                               \
+  V(I32x4SConvertI16x8Low, kRiscvI32x4SConvertI16x8Low)         \
-  V(I32x4BitMask, kRiscvI32x4BitMask)                       \
+  V(I32x4SConvertI16x8High, kRiscvI32x4SConvertI16x8High)       \
-  V(I32x4TruncSatF64x2SZero, kRiscvI32x4TruncSatF64x2SZero) \
+  V(I32x4UConvertI16x8Low, kRiscvI32x4UConvertI16x8Low)         \
-  V(I32x4TruncSatF64x2UZero, kRiscvI32x4TruncSatF64x2UZero) \
+  V(I32x4UConvertI16x8High, kRiscvI32x4UConvertI16x8High)       \
-  V(I16x8Neg, kRiscvI16x8Neg)                               \
+  V(I32x4Abs, kRiscvI32x4Abs)                                   \
-  V(I16x8SConvertI8x16Low, kRiscvI16x8SConvertI8x16Low)     \
+  V(I32x4BitMask, kRiscvI32x4BitMask)                           \
-  V(I16x8SConvertI8x16High, kRiscvI16x8SConvertI8x16High)   \
+  V(I32x4TruncSatF64x2SZero, kRiscvI32x4TruncSatF64x2SZero)     \
-  V(I16x8UConvertI8x16Low, kRiscvI16x8UConvertI8x16Low)     \
+  V(I32x4TruncSatF64x2UZero, kRiscvI32x4TruncSatF64x2UZero)     \
-  V(I16x8UConvertI8x16High, kRiscvI16x8UConvertI8x16High)   \
+  V(I16x8Neg, kRiscvI16x8Neg)                                   \
-  V(I16x8Abs, kRiscvI16x8Abs)                               \
+  V(I16x8SConvertI8x16Low, kRiscvI16x8SConvertI8x16Low)         \
-  V(I16x8BitMask, kRiscvI16x8BitMask)                       \
+  V(I16x8SConvertI8x16High, kRiscvI16x8SConvertI8x16High)       \
-  V(I8x16Neg, kRiscvI8x16Neg)                               \
+  V(I16x8UConvertI8x16Low, kRiscvI16x8UConvertI8x16Low)         \
-  V(I8x16Abs, kRiscvI8x16Abs)                               \
+  V(I16x8UConvertI8x16High, kRiscvI16x8UConvertI8x16High)       \
-  V(I8x16BitMask, kRiscvI8x16BitMask)                       \
+  V(I16x8Abs, kRiscvI16x8Abs)                                   \
-  V(I8x16Popcnt, kRiscvI8x16Popcnt)                         \
+  V(I16x8BitMask, kRiscvI16x8BitMask)                           \
-  V(S128Not, kRiscvS128Not)                                 \
+  V(I8x16Neg, kRiscvI8x16Neg)                                   \
-  V(V128AnyTrue, kRiscvV128AnyTrue)                         \
+  V(I8x16Abs, kRiscvI8x16Abs)                                   \
-  V(I32x4AllTrue, kRiscvI32x4AllTrue)                       \
+  V(I8x16BitMask, kRiscvI8x16BitMask)                           \
-  V(I16x8AllTrue, kRiscvI16x8AllTrue)                       \
+  V(I8x16Popcnt, kRiscvI8x16Popcnt)                             \
-  V(I8x16AllTrue, kRiscvI8x16AllTrue)                       \
+  V(S128Not, kRiscvS128Not)                                     \
+  V(V128AnyTrue, kRiscvV128AnyTrue)                             \
+  V(I32x4AllTrue, kRiscvI32x4AllTrue)                           \
+  V(I16x8AllTrue, kRiscvI16x8AllTrue)                           \
+  V(I8x16AllTrue, kRiscvI8x16AllTrue)                           \
  V(I64x2AllTrue, kRiscvI64x2AllTrue)
 #define SIMD_SHIFT_OP_LIST(V) \
@@ -2904,6 +2908,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
  V(F32x4Ne, kRiscvF32x4Ne)                             \
  V(F32x4Lt, kRiscvF32x4Lt)                             \
  V(F32x4Le, kRiscvF32x4Le)                             \
+  V(F32x4RelaxedMin, kRiscvF32x4Min)                    \
+  V(F32x4RelaxedMax, kRiscvF32x4Max)                    \
+  V(F64x2RelaxedMin, kRiscvF64x2Min)                    \
+  V(F64x2RelaxedMax, kRiscvF64x2Max)                    \
  V(I32x4Add, kRiscvI32x4Add)                           \
  V(I32x4Sub, kRiscvI32x4Sub)                           \
  V(I32x4Mul, kRiscvI32x4Mul)                           \
@@ -3042,6 +3050,26 @@ void InstructionSelector::VisitS128Select(Node* node) {
  VisitRRRR(this, kRiscvS128Select, node);
 }
+#define SIMD_VISIT_SELECT_LANE(Name)                  \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRRR(this, kRiscvS128Select, node);          \
+  }
+SIMD_VISIT_SELECT_LANE(I8x16RelaxedLaneSelect)
+SIMD_VISIT_SELECT_LANE(I16x8RelaxedLaneSelect)
+SIMD_VISIT_SELECT_LANE(I32x4RelaxedLaneSelect)
+SIMD_VISIT_SELECT_LANE(I64x2RelaxedLaneSelect)
+#undef SIMD_VISIT_SELECT_LANE
+#define VISIT_SIMD_QFMOP(Name, instruction)           \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRRR(this, instruction, node);               \
+  }
+VISIT_SIMD_QFMOP(F64x2Qfma, kRiscvF64x2Qfma)
+VISIT_SIMD_QFMOP(F64x2Qfms, kRiscvF64x2Qfms)
+VISIT_SIMD_QFMOP(F32x4Qfma, kRiscvF32x4Qfma)
+VISIT_SIMD_QFMOP(F32x4Qfms, kRiscvF32x4Qfms)
+#undef VISIT_SIMD_QFMOP
 void InstructionSelector::VisitI32x4DotI16x8S(Node* node) {
  RiscvOperandGenerator g(this);
  InstructionOperand temp = g.TempFpRegister(v14);

--- a/src/diagnostics/riscv64/disasm-riscv64.cc
+++ b/src/diagnostics/riscv64/disasm-riscv64.cc
@@ -2404,6 +2404,12 @@ void Decoder::DecodeRvvFVV(Instruction* instr) {
        case VFSQRT_V:
          Format(instr, "vfsqrt.v  'vd, 'vs2'vm");
          break;
+        case VFRSQRT7_V:
+          Format(instr, "vfrsqrt7.v 'vd, 'vs2'vm");
+          break;
+        case VFREC7_V:
+          Format(instr, "vfrec7.v  'vd, 'vs2'vm");
+          break;
        default:
          break;
      }

--- a/src/execution/riscv64/simulator-riscv64.cc
+++ b/src/execution/riscv64/simulator-riscv64.cc
@@ -51,6 +51,7 @@
 #include <stdlib.h>
 #include "src/base/bits.h"
+#include "src/base/overflowing-math.h"
 #include "src/base/vector.h"
 #include "src/codegen/assembler-inl.h"
 #include "src/codegen/macro-assembler.h"
@@ -6057,6 +6058,30 @@ void Simulator::DecodeRvvFVV() {
                               USE(fs1);
                             })
          break;
+        case VFRSQRT7_V:
+          RVV_VI_VFP_VF_LOOP(
+              {},
+              {
+                vd = base::RecipSqrt(vs2);
+                USE(fs1);
+              },
+              {
+                vd = base::RecipSqrt(vs2);
+                USE(fs1);
+              })
+          break;
+        case VFREC7_V:
+          RVV_VI_VFP_VF_LOOP(
+              {},
+              {
+                vd = base::Recip(vs2);
+                USE(fs1);
+              },
+              {
+                vd = base::Recip(vs2);
+                USE(fs1);
+              })
+          break;
        default:
          break;
      }

--- a/test/cctest/test-disasm-riscv64.cc
+++ b/test/cctest/test-disasm-riscv64.cc
@@ -627,6 +627,11 @@ TEST(RVV) {
  COMPARE(vfirst_m(a5, v17), "4318a7d7       vfirst.m  a5, v17");
  COMPARE(vcpop_m(a5, v17), "431827d7       vcpop.m   a5, v17");
+  COMPARE(vfsqrt_v(v17, v28), "4fc018d7       vfsqrt.v  v17, v28")
+  COMPARE(vfrsqrt7_v(v17, v28), "4fc218d7       vfrsqrt7.v v17, v28")
+  COMPARE(vfrec7_v(v17, v28), "4fc298d7       vfrec7.v  v17, v28")
+  COMPARE(vfclass_v(v17, v28), "4fc818d7       vfclass.v  v17, v28")
  VERIFY_RUN();
 }

--- a/test/cctest/wasm/test-run-wasm-relaxed-simd.cc
+++ b/test/cctest/wasm/test-run-wasm-relaxed-simd.cc
@@ -34,7 +34,7 @@ namespace test_run_wasm_relaxed_simd {
  void RunWasm_##name##_Impl(TestExecutionTier execution_tier)
 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
-    V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32
+    V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
 // Only used for qfma and qfms tests below.
 // FMOperation holds the params (a, b, c) for a Multiply-Add or
@@ -122,10 +122,10 @@ bool ExpectFused(TestExecutionTier tier) {
 #endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
 }
 #endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
-        // V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32
+        // V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
-    V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32
+    V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
 WASM_RELAXED_SIMD_TEST(F32x4Qfma) {
  WasmRunner<int32_t, float, float, float> r(execution_tier);
  // Set up global to hold mask output.
@@ -222,7 +222,7 @@ WASM_RELAXED_SIMD_TEST(F64x2Qfms) {
  }
 }
 #endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
-        // V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32
+        // V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
 WASM_RELAXED_SIMD_TEST(F32x4RecipApprox) {
  RunF32x4UnOpTest(execution_tier, kExprF32x4RecipApprox, base::Recip,
@@ -234,7 +234,8 @@ WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) {
                   false /* !exact */);
 }
-#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
+#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
+    V8_TARGET_ARCH_RISCV64
 namespace {
 // Helper to convert an array of T into an array of uint8_t to be used a v128
 // constants.
@@ -407,7 +408,8 @@ WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
    CHECK_EQ(LANE(dst, i), i);
  }
 }
-#endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64
+#endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 ||
+        // V8_TARGET_ARCH_RISCV64
 #undef WASM_RELAXED_SIMD_TEST
 }  // namespace test_run_wasm_relaxed_simd