Commit 8861ca7b authored by Yujie Wang's avatar Yujie Wang Committed by Yahan Lu

[riscv64] Add RISCV64 support for wasm-relaxed-simd

- Implement `kRiscvF32x4RecipApprox`, `kRiscvF32x4RecipSqrtApprox`,
  `kRiscvF32x4Qfma`, `kRiscvF32x4Qfms`, `kRiscvF64x2Qfma` and `kRiscvF64x2Qfms`
  in `code-generator-riscv64.cc`

- Reuse lane-select, min-max and trunc instrctions in
  `instruction-selector-riscv64.cc`

Bug: v8:11976
Change-Id: I8566f7e082a3d7071ec9fc64c742da82425a4d4d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3364077Reviewed-by: 's avatarYahan Lu <yahan@iscas.ac.cn>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#78524}
parent c0cec07e
...@@ -982,6 +982,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -982,6 +982,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
DEFINE_VFUNARY(vfclass_v, VFUNARY1_FUNCT6, VFCLASS_V) DEFINE_VFUNARY(vfclass_v, VFUNARY1_FUNCT6, VFCLASS_V)
DEFINE_VFUNARY(vfsqrt_v, VFUNARY1_FUNCT6, VFSQRT_V) DEFINE_VFUNARY(vfsqrt_v, VFUNARY1_FUNCT6, VFSQRT_V)
DEFINE_VFUNARY(vfrsqrt7_v, VFUNARY1_FUNCT6, VFRSQRT7_V)
DEFINE_VFUNARY(vfrec7_v, VFUNARY1_FUNCT6, VFREC7_V)
#undef DEFINE_VFUNARY #undef DEFINE_VFUNARY
void vnot_vv(VRegister dst, VRegister src, MaskType mask = NoMask) { void vnot_vv(VRegister dst, VRegister src, MaskType mask = NoMask) {
......
...@@ -957,7 +957,7 @@ enum Opcode : uint32_t { ...@@ -957,7 +957,7 @@ enum Opcode : uint32_t {
VFCLASS_V = 0b10000, VFCLASS_V = 0b10000,
VFSQRT_V = 0b00000, VFSQRT_V = 0b00000,
VFSQRT7_V = 0b00100, VFRSQRT7_V = 0b00100,
VFREC7_V = 0b00101, VFREC7_V = 0b00101,
VFADD_FUNCT6 = 0b000000, VFADD_FUNCT6 = 0b000000,
......
...@@ -2785,16 +2785,18 @@ void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } ...@@ -2785,16 +2785,18 @@ void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_ARM64
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64 && \
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_RISCV64
void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
// && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32 &&
// !V8_TARGET_ARCH_RISCV64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 && \
!V8_TARGET_ARCH_RISCV64
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) { void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
...@@ -2824,6 +2826,7 @@ void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) { ...@@ -2824,6 +2826,7 @@ void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
// && !V8_TARGET_ARCH_RISCV64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -2981,6 +2981,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2981,6 +2981,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kRiscvF64x2Qfma: {
__ VU.set(kScratchReg, E64, m1);
__ vfmadd_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
i.InputSimd128Register(0));
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kRiscvF64x2Qfms: {
__ VU.set(kScratchReg, E64, m1);
__ vfnmsub_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
i.InputSimd128Register(0));
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kRiscvF32x4ExtractLane: { case kRiscvF32x4ExtractLane: {
__ VU.set(kScratchReg, E32, m1); __ VU.set(kScratchReg, E32, m1);
__ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0), __ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
...@@ -3155,6 +3169,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3155,6 +3169,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg); __ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
break; break;
} }
case kRiscvF32x4RecipApprox: {
__ VU.set(kScratchReg, E32, m1);
__ vfrec7_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kRiscvF32x4RecipSqrtApprox: {
__ VU.set(kScratchReg, E32, m1);
__ vfrsqrt7_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kRiscvF32x4Qfma: {
__ VU.set(kScratchReg, E32, m1);
__ vfmadd_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
i.InputSimd128Register(0));
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kRiscvF32x4Qfms: {
__ VU.set(kScratchReg, E32, m1);
__ vfnmsub_vv(i.InputSimd128Register(1), i.InputSimd128Register(2),
i.InputSimd128Register(0));
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kRiscvI64x2SConvertI32x4Low: { case kRiscvI64x2SConvertI32x4Low: {
__ VU.set(kScratchReg, E64, m1); __ VU.set(kScratchReg, E64, m1);
__ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0)); __ vmv_vv(kSimd128ScratchReg, i.InputSimd128Register(0));
......
...@@ -237,6 +237,10 @@ namespace compiler { ...@@ -237,6 +237,10 @@ namespace compiler {
V(RiscvF32x4Sqrt) \ V(RiscvF32x4Sqrt) \
V(RiscvF32x4RecipApprox) \ V(RiscvF32x4RecipApprox) \
V(RiscvF32x4RecipSqrtApprox) \ V(RiscvF32x4RecipSqrtApprox) \
V(RiscvF32x4Qfma) \
V(RiscvF32x4Qfms) \
V(RiscvF64x2Qfma) \
V(RiscvF64x2Qfms) \
V(RiscvF32x4Add) \ V(RiscvF32x4Add) \
V(RiscvF32x4Sub) \ V(RiscvF32x4Sub) \
V(RiscvF32x4Mul) \ V(RiscvF32x4Mul) \
......
...@@ -121,6 +121,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -121,6 +121,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kRiscvF32x4Sqrt: case kRiscvF32x4Sqrt:
case kRiscvF32x4RecipApprox: case kRiscvF32x4RecipApprox:
case kRiscvF32x4RecipSqrtApprox: case kRiscvF32x4RecipSqrtApprox:
case kRiscvF64x2Qfma:
case kRiscvF64x2Qfms:
case kRiscvF32x4Qfma:
case kRiscvF32x4Qfms:
case kRiscvF32x4ReplaceLane: case kRiscvF32x4ReplaceLane:
case kRiscvF32x4SConvertI32x4: case kRiscvF32x4SConvertI32x4:
case kRiscvF32x4Splat: case kRiscvF32x4Splat:
......
...@@ -2829,6 +2829,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2829,6 +2829,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4Floor, kRiscvF32x4Floor) \ V(F32x4Floor, kRiscvF32x4Floor) \
V(F32x4Trunc, kRiscvF32x4Trunc) \ V(F32x4Trunc, kRiscvF32x4Trunc) \
V(F32x4NearestInt, kRiscvF32x4NearestInt) \ V(F32x4NearestInt, kRiscvF32x4NearestInt) \
V(I32x4RelaxedTruncF32x4S, kRiscvI32x4SConvertF32x4) \
V(I32x4RelaxedTruncF32x4U, kRiscvI32x4UConvertF32x4) \
V(I32x4RelaxedTruncF64x2SZero, kRiscvI32x4TruncSatF64x2SZero) \
V(I32x4RelaxedTruncF64x2UZero, kRiscvI32x4TruncSatF64x2UZero) \
V(I64x2SConvertI32x4Low, kRiscvI64x2SConvertI32x4Low) \ V(I64x2SConvertI32x4Low, kRiscvI64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High, kRiscvI64x2SConvertI32x4High) \ V(I64x2SConvertI32x4High, kRiscvI64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low, kRiscvI64x2UConvertI32x4Low) \ V(I64x2UConvertI32x4Low, kRiscvI64x2UConvertI32x4Low) \
...@@ -2904,6 +2908,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2904,6 +2908,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4Ne, kRiscvF32x4Ne) \ V(F32x4Ne, kRiscvF32x4Ne) \
V(F32x4Lt, kRiscvF32x4Lt) \ V(F32x4Lt, kRiscvF32x4Lt) \
V(F32x4Le, kRiscvF32x4Le) \ V(F32x4Le, kRiscvF32x4Le) \
V(F32x4RelaxedMin, kRiscvF32x4Min) \
V(F32x4RelaxedMax, kRiscvF32x4Max) \
V(F64x2RelaxedMin, kRiscvF64x2Min) \
V(F64x2RelaxedMax, kRiscvF64x2Max) \
V(I32x4Add, kRiscvI32x4Add) \ V(I32x4Add, kRiscvI32x4Add) \
V(I32x4Sub, kRiscvI32x4Sub) \ V(I32x4Sub, kRiscvI32x4Sub) \
V(I32x4Mul, kRiscvI32x4Mul) \ V(I32x4Mul, kRiscvI32x4Mul) \
...@@ -3042,6 +3050,26 @@ void InstructionSelector::VisitS128Select(Node* node) { ...@@ -3042,6 +3050,26 @@ void InstructionSelector::VisitS128Select(Node* node) {
VisitRRRR(this, kRiscvS128Select, node); VisitRRRR(this, kRiscvS128Select, node);
} }
#define SIMD_VISIT_SELECT_LANE(Name) \
void InstructionSelector::Visit##Name(Node* node) { \
VisitRRRR(this, kRiscvS128Select, node); \
}
SIMD_VISIT_SELECT_LANE(I8x16RelaxedLaneSelect)
SIMD_VISIT_SELECT_LANE(I16x8RelaxedLaneSelect)
SIMD_VISIT_SELECT_LANE(I32x4RelaxedLaneSelect)
SIMD_VISIT_SELECT_LANE(I64x2RelaxedLaneSelect)
#undef SIMD_VISIT_SELECT_LANE
#define VISIT_SIMD_QFMOP(Name, instruction) \
void InstructionSelector::Visit##Name(Node* node) { \
VisitRRRR(this, instruction, node); \
}
VISIT_SIMD_QFMOP(F64x2Qfma, kRiscvF64x2Qfma)
VISIT_SIMD_QFMOP(F64x2Qfms, kRiscvF64x2Qfms)
VISIT_SIMD_QFMOP(F32x4Qfma, kRiscvF32x4Qfma)
VISIT_SIMD_QFMOP(F32x4Qfms, kRiscvF32x4Qfms)
#undef VISIT_SIMD_QFMOP
void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { void InstructionSelector::VisitI32x4DotI16x8S(Node* node) {
RiscvOperandGenerator g(this); RiscvOperandGenerator g(this);
InstructionOperand temp = g.TempFpRegister(v14); InstructionOperand temp = g.TempFpRegister(v14);
......
...@@ -2404,6 +2404,12 @@ void Decoder::DecodeRvvFVV(Instruction* instr) { ...@@ -2404,6 +2404,12 @@ void Decoder::DecodeRvvFVV(Instruction* instr) {
case VFSQRT_V: case VFSQRT_V:
Format(instr, "vfsqrt.v 'vd, 'vs2'vm"); Format(instr, "vfsqrt.v 'vd, 'vs2'vm");
break; break;
case VFRSQRT7_V:
Format(instr, "vfrsqrt7.v 'vd, 'vs2'vm");
break;
case VFREC7_V:
Format(instr, "vfrec7.v 'vd, 'vs2'vm");
break;
default: default:
break; break;
} }
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "src/base/bits.h" #include "src/base/bits.h"
#include "src/base/overflowing-math.h"
#include "src/base/vector.h" #include "src/base/vector.h"
#include "src/codegen/assembler-inl.h" #include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler.h" #include "src/codegen/macro-assembler.h"
...@@ -6057,6 +6058,30 @@ void Simulator::DecodeRvvFVV() { ...@@ -6057,6 +6058,30 @@ void Simulator::DecodeRvvFVV() {
USE(fs1); USE(fs1);
}) })
break; break;
case VFRSQRT7_V:
RVV_VI_VFP_VF_LOOP(
{},
{
vd = base::RecipSqrt(vs2);
USE(fs1);
},
{
vd = base::RecipSqrt(vs2);
USE(fs1);
})
break;
case VFREC7_V:
RVV_VI_VFP_VF_LOOP(
{},
{
vd = base::Recip(vs2);
USE(fs1);
},
{
vd = base::Recip(vs2);
USE(fs1);
})
break;
default: default:
break; break;
} }
......
...@@ -627,6 +627,11 @@ TEST(RVV) { ...@@ -627,6 +627,11 @@ TEST(RVV) {
COMPARE(vfirst_m(a5, v17), "4318a7d7 vfirst.m a5, v17"); COMPARE(vfirst_m(a5, v17), "4318a7d7 vfirst.m a5, v17");
COMPARE(vcpop_m(a5, v17), "431827d7 vcpop.m a5, v17"); COMPARE(vcpop_m(a5, v17), "431827d7 vcpop.m a5, v17");
COMPARE(vfsqrt_v(v17, v28), "4fc018d7 vfsqrt.v v17, v28")
COMPARE(vfrsqrt7_v(v17, v28), "4fc218d7 vfrsqrt7.v v17, v28")
COMPARE(vfrec7_v(v17, v28), "4fc298d7 vfrec7.v v17, v28")
COMPARE(vfclass_v(v17, v28), "4fc818d7 vfclass.v v17, v28")
VERIFY_RUN(); VERIFY_RUN();
} }
......
...@@ -34,7 +34,7 @@ namespace test_run_wasm_relaxed_simd { ...@@ -34,7 +34,7 @@ namespace test_run_wasm_relaxed_simd {
void RunWasm_##name##_Impl(TestExecutionTier execution_tier) void RunWasm_##name##_Impl(TestExecutionTier execution_tier)
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \ #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
// Only used for qfma and qfms tests below. // Only used for qfma and qfms tests below.
// FMOperation holds the params (a, b, c) for a Multiply-Add or // FMOperation holds the params (a, b, c) for a Multiply-Add or
...@@ -122,10 +122,10 @@ bool ExpectFused(TestExecutionTier tier) { ...@@ -122,10 +122,10 @@ bool ExpectFused(TestExecutionTier tier) {
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 // V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \ #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
WASM_RELAXED_SIMD_TEST(F32x4Qfma) { WASM_RELAXED_SIMD_TEST(F32x4Qfma) {
WasmRunner<int32_t, float, float, float> r(execution_tier); WasmRunner<int32_t, float, float, float> r(execution_tier);
// Set up global to hold mask output. // Set up global to hold mask output.
...@@ -222,7 +222,7 @@ WASM_RELAXED_SIMD_TEST(F64x2Qfms) { ...@@ -222,7 +222,7 @@ WASM_RELAXED_SIMD_TEST(F64x2Qfms) {
} }
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 // V8_TARGET_ARCH_PPC64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_RISCV64
WASM_RELAXED_SIMD_TEST(F32x4RecipApprox) { WASM_RELAXED_SIMD_TEST(F32x4RecipApprox) {
RunF32x4UnOpTest(execution_tier, kExprF32x4RecipApprox, base::Recip, RunF32x4UnOpTest(execution_tier, kExprF32x4RecipApprox, base::Recip,
...@@ -234,7 +234,8 @@ WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) { ...@@ -234,7 +234,8 @@ WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) {
false /* !exact */); false /* !exact */);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_RISCV64
namespace { namespace {
// Helper to convert an array of T into an array of uint8_t to be used a v128 // Helper to convert an array of T into an array of uint8_t to be used a v128
// constants. // constants.
...@@ -407,7 +408,8 @@ WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) { ...@@ -407,7 +408,8 @@ WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
CHECK_EQ(LANE(dst, i), i); CHECK_EQ(LANE(dst, i), i);
} }
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 ||
// V8_TARGET_ARCH_RISCV64
#undef WASM_RELAXED_SIMD_TEST #undef WASM_RELAXED_SIMD_TEST
} // namespace test_run_wasm_relaxed_simd } // namespace test_run_wasm_relaxed_simd
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment