Commit 4f06ec6d authored by Lu Yahan's avatar Lu Yahan Committed by V8 LUCI CQ

[riscv64] Implement simd for liftoff

Bug: v8:11976

Change-Id: Ifdce8e668c4b0fe20180c8d28b9c1d4abe705a67
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3297354
Commit-Queue: ji qiu <qiuji@iscas.ac.cn>
Reviewed-by: 's avatarji qiu <qiuji@iscas.ac.cn>
Cr-Commit-Position: refs/heads/main@{#78078}
parent 53d9e8b1
......@@ -2461,6 +2461,27 @@ void Assembler::EBREAK() {
}
// RVV
void Assembler::vredmaxu_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask) {
GenInstrV(VREDMAXU_FUNCT6, OP_MVV, vd, vs1, vs2, mask);
}
void Assembler::vredmax_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask) {
GenInstrV(VREDMAX_FUNCT6, OP_MVV, vd, vs1, vs2, mask);
}
void Assembler::vredmin_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask) {
GenInstrV(VREDMIN_FUNCT6, OP_MVV, vd, vs1, vs2, mask);
}
void Assembler::vredminu_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask) {
GenInstrV(VREDMINU_FUNCT6, OP_MVV, vd, vs1, vs2, mask);
}
void Assembler::vmv_vv(VRegister vd, VRegister vs1) {
GenInstrV(VMV_FUNCT6, OP_IVV, vd, vs1, v0, NoMask);
}
......@@ -2536,6 +2557,11 @@ void Assembler::vrgather_vx(VRegister vd, VRegister vs2, Register rs1,
GenInstrV(VRGATHER_FUNCT6, OP_IVX, vd, rs1, vs2, mask);
}
void Assembler::vwaddu_wx(VRegister vd, VRegister vs2, Register rs1,
MaskType mask) {
GenInstrV(VWADDUW_FUNCT6, OP_MVX, vd, rs1, vs2, mask);
}
#define DEFINE_OPIVV(name, funct6) \
void Assembler::name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \
MaskType mask) { \
......@@ -2548,6 +2574,12 @@ void Assembler::vrgather_vx(VRegister vd, VRegister vs2, Register rs1,
GenInstrV(funct6, OP_FVV, vd, vs1, vs2, mask); \
}
#define DEFINE_OPFRED(name, funct6) \
void Assembler::name##_vs(VRegister vd, VRegister vs2, VRegister vs1, \
MaskType mask) { \
GenInstrV(funct6, OP_FVV, vd, vs1, vs2, mask); \
}
#define DEFINE_OPIVX(name, funct6) \
void Assembler::name##_vx(VRegister vd, VRegister vs2, Register rs1, \
MaskType mask) { \
......@@ -2561,11 +2593,19 @@ void Assembler::vrgather_vx(VRegister vd, VRegister vs2, Register rs1,
}
#define DEFINE_OPMVV(name, funct6) \
void Assembler::name##_vs(VRegister vd, VRegister vs2, VRegister vs1, \
void Assembler::name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \
MaskType mask) { \
GenInstrV(funct6, OP_MVV, vd, vs1, vs2, mask); \
}
// void GenInstrV(uint8_t funct6, Opcode opcode, VRegister vd, Register rs1,
// VRegister vs2, MaskType mask = NoMask);
#define DEFINE_OPMVX(name, funct6) \
void Assembler::name##_vx(VRegister vd, VRegister vs2, Register rs1, \
MaskType mask) { \
GenInstrV(funct6, OP_MVX, vd, rs1, vs2, mask); \
}
#define DEFINE_OPFVF(name, funct6) \
void Assembler::name##_vf(VRegister vd, VRegister vs2, FPURegister fs1, \
MaskType mask) { \
......@@ -2594,8 +2634,12 @@ void Assembler::vfmv_vf(VRegister vd, FPURegister fs1, MaskType mask) {
GenInstrV(VMV_FUNCT6, OP_FVF, vd, fs1, v0, mask);
}
void Assembler::vfmv_fs(FPURegister fd, VRegister vs2, MaskType mask) {
GenInstrV(VWFUNARY0_FUNCT6, OP_FVV, fd, v0, vs2, mask);
void Assembler::vfmv_fs(FPURegister fd, VRegister vs2) {
GenInstrV(VWFUNARY0_FUNCT6, OP_FVV, fd, v0, vs2, NoMask);
}
void Assembler::vfmv_sf(VRegister vd, FPURegister fs) {
GenInstrV(VRFUNARY0_FUNCT6, OP_FVF, vd, fs, v0, NoMask);
}
DEFINE_OPIVV(vadd, VADD_FUNCT6)
......@@ -2603,6 +2647,19 @@ DEFINE_OPIVX(vadd, VADD_FUNCT6)
DEFINE_OPIVI(vadd, VADD_FUNCT6)
DEFINE_OPIVV(vsub, VSUB_FUNCT6)
DEFINE_OPIVX(vsub, VSUB_FUNCT6)
DEFINE_OPMVX(vdiv, VDIV_FUNCT6)
DEFINE_OPMVX(vdivu, VDIVU_FUNCT6)
DEFINE_OPMVX(vmul, VMUL_FUNCT6)
DEFINE_OPMVX(vmulhu, VMULHU_FUNCT6)
DEFINE_OPMVX(vmulhsu, VMULHSU_FUNCT6)
DEFINE_OPMVX(vmulh, VMULH_FUNCT6)
DEFINE_OPMVV(vdiv, VDIV_FUNCT6)
DEFINE_OPMVV(vdivu, VDIVU_FUNCT6)
DEFINE_OPMVV(vmul, VMUL_FUNCT6)
DEFINE_OPMVV(vmulhu, VMULHU_FUNCT6)
DEFINE_OPMVV(vmulhsu, VMULHSU_FUNCT6)
DEFINE_OPMVV(vmulh, VMULH_FUNCT6)
DEFINE_OPMVV(vwaddu, VWADDU_FUNCT6)
DEFINE_OPIVX(vsadd, VSADD_FUNCT6)
DEFINE_OPIVV(vsadd, VSADD_FUNCT6)
DEFINE_OPIVI(vsadd, VSADD_FUNCT6)
......@@ -2670,14 +2727,16 @@ DEFINE_OPIVV(vsrl, VSRL_FUNCT6)
DEFINE_OPIVX(vsrl, VSRL_FUNCT6)
DEFINE_OPIVI(vsrl, VSRL_FUNCT6)
DEFINE_OPIVV(vsra, VSRA_FUNCT6)
DEFINE_OPIVX(vsra, VSRA_FUNCT6)
DEFINE_OPIVI(vsra, VSRA_FUNCT6)
DEFINE_OPIVV(vsll, VSLL_FUNCT6)
DEFINE_OPIVX(vsll, VSLL_FUNCT6)
DEFINE_OPIVI(vsll, VSLL_FUNCT6)
DEFINE_OPMVV(vredmaxu, VREDMAXU_FUNCT6)
DEFINE_OPMVV(vredmax, VREDMAX_FUNCT6)
DEFINE_OPMVV(vredmin, VREDMIN_FUNCT6)
DEFINE_OPMVV(vredminu, VREDMINU_FUNCT6)
DEFINE_OPIVV(vsmul, VSMUL_FUNCT6)
DEFINE_OPIVX(vsmul, VSMUL_FUNCT6)
DEFINE_OPFVV(vfadd, VFADD_FUNCT6)
DEFINE_OPFVF(vfadd, VFADD_FUNCT6)
......@@ -2694,6 +2753,8 @@ DEFINE_OPFVV(vmfle, VMFLE_FUNCT6)
DEFINE_OPFVV(vfmax, VFMAX_FUNCT6)
DEFINE_OPFVV(vfmin, VFMIN_FUNCT6)
DEFINE_OPFRED(vfredmax, VFREDMAX_FUNCT6)
DEFINE_OPFVV(vfsngj, VFSGNJ_FUNCT6)
DEFINE_OPFVF(vfsngj, VFSGNJ_FUNCT6)
DEFINE_OPFVV(vfsngjn, VFSGNJN_FUNCT6)
......
......@@ -739,6 +739,15 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmerge_vx(VRegister vd, Register rs1, VRegister vs2);
void vmerge_vi(VRegister vd, uint8_t imm5, VRegister vs2);
void vredmaxu_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask = NoMask);
void vredmax_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask = NoMask);
void vredmin_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask = NoMask);
void vredminu_vs(VRegister vd, VRegister vs2, VRegister vs1,
MaskType mask = NoMask);
void vadc_vv(VRegister vd, VRegister vs1, VRegister vs2);
void vadc_vx(VRegister vd, Register rs1, VRegister vs2);
void vadc_vi(VRegister vd, uint8_t imm5, VRegister vs2);
......@@ -748,7 +757,11 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmadc_vi(VRegister vd, uint8_t imm5, VRegister vs2);
void vfmv_vf(VRegister vd, FPURegister fs1, MaskType mask = NoMask);
void vfmv_fs(FPURegister fd, VRegister vs2, MaskType mask = NoMask);
void vfmv_fs(FPURegister fd, VRegister vs2);
void vfmv_sf(VRegister vd, FPURegister fs);
void vwaddu_wx(VRegister vd, VRegister vs2, Register rs1,
MaskType mask = NoMask);
#define DEFINE_OPIVV(name, funct6) \
void name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \
......@@ -763,7 +776,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
MaskType mask = NoMask);
#define DEFINE_OPMVV(name, funct6) \
void name##_vs(VRegister vd, VRegister vs2, VRegister vs1, \
void name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \
MaskType mask = NoMask);
#define DEFINE_OPMVX(name, funct6) \
......@@ -774,6 +787,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \
MaskType mask = NoMask);
#define DEFINE_OPFRED(name, funct6) \
void name##_vs(VRegister vd, VRegister vs2, VRegister vs1, \
MaskType mask = NoMask);
#define DEFINE_OPFVF(name, funct6) \
void name##_vf(VRegister vd, VRegister vs2, FPURegister fs1, \
MaskType mask = NoMask);
......@@ -794,6 +811,19 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
DEFINE_OPIVI(vadd, VADD_FUNCT6)
DEFINE_OPIVV(vsub, VSUB_FUNCT6)
DEFINE_OPIVX(vsub, VSUB_FUNCT6)
DEFINE_OPMVX(vdiv, VDIV_FUNCT6)
DEFINE_OPMVX(vdivu, VDIVU_FUNCT6)
DEFINE_OPMVX(vmul, VMUL_FUNCT6)
DEFINE_OPMVX(vmulhu, VMULHU_FUNCT6)
DEFINE_OPMVX(vmulhsu, VMULHSU_FUNCT6)
DEFINE_OPMVX(vmulh, VMULH_FUNCT6)
DEFINE_OPMVV(vdiv, VDIV_FUNCT6)
DEFINE_OPMVV(vdivu, VDIVU_FUNCT6)
DEFINE_OPMVV(vmul, VMUL_FUNCT6)
DEFINE_OPMVV(vmulhu, VMULHU_FUNCT6)
DEFINE_OPMVV(vmulhsu, VMULHSU_FUNCT6)
DEFINE_OPMVV(vmulh, VMULH_FUNCT6)
DEFINE_OPMVV(vwaddu, VWADDU_FUNCT6)
DEFINE_OPIVX(vsadd, VSADD_FUNCT6)
DEFINE_OPIVV(vsadd, VSADD_FUNCT6)
DEFINE_OPIVI(vsadd, VSADD_FUNCT6)
......@@ -864,14 +894,16 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
DEFINE_OPIVX(vsrl, VSRL_FUNCT6)
DEFINE_OPIVI(vsrl, VSRL_FUNCT6)
DEFINE_OPIVV(vsra, VSRA_FUNCT6)
DEFINE_OPIVX(vsra, VSRA_FUNCT6)
DEFINE_OPIVI(vsra, VSRA_FUNCT6)
DEFINE_OPIVV(vsll, VSLL_FUNCT6)
DEFINE_OPIVX(vsll, VSLL_FUNCT6)
DEFINE_OPIVI(vsll, VSLL_FUNCT6)
DEFINE_OPMVV(vredmaxu, VREDMAXU_FUNCT6)
DEFINE_OPMVV(vredmax, VREDMAX_FUNCT6)
DEFINE_OPMVV(vredmin, VREDMIN_FUNCT6)
DEFINE_OPMVV(vredminu, VREDMINU_FUNCT6)
DEFINE_OPIVV(vsmul, VSMUL_FUNCT6)
DEFINE_OPIVX(vsmul, VSMUL_FUNCT6)
DEFINE_OPFVV(vfadd, VFADD_FUNCT6)
DEFINE_OPFVF(vfadd, VFADD_FUNCT6)
......@@ -888,6 +920,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
DEFINE_OPFVV(vmfle, VMFLE_FUNCT6)
DEFINE_OPFVV(vfmax, VMFMAX_FUNCT6)
DEFINE_OPFVV(vfmin, VMFMIN_FUNCT6)
DEFINE_OPFRED(vfredmax, VFREDMAX_FUNCT6)
DEFINE_OPFVV(vfsngj, VFSGNJ_FUNCT6)
DEFINE_OPFVF(vfsngj, VFSGNJ_FUNCT6)
......@@ -940,6 +973,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
#undef DEFINE_OPFVV_FMA
#undef DEFINE_OPFVF_FMA
#undef DEFINE_OPMVV_VIE
#undef DEFINE_OPFRED
#define DEFINE_VFUNARY(name, funct6, vs1) \
void name(VRegister vd, VRegister vs2, MaskType mask = NoMask) { \
......@@ -953,6 +987,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
DEFINE_VFUNARY(vfncvt_f_f_w, VFUNARY0_FUNCT6, VFNCVT_F_F_W)
DEFINE_VFUNARY(vfclass_v, VFUNARY1_FUNCT6, VFCLASS_V)
DEFINE_VFUNARY(vfsqrt_v, VFUNARY1_FUNCT6, VFSQRT_V)
#undef DEFINE_VFUNARY
void vnot_vv(VRegister dst, VRegister src, MaskType mask = NoMask) {
......
......@@ -712,6 +712,46 @@ enum Opcode : uint32_t {
RO_V_VSUB_VX = OP_IVX | (VSUB_FUNCT6 << kRvvFunct6Shift),
RO_V_VSUB_VV = OP_IVV | (VSUB_FUNCT6 << kRvvFunct6Shift),
VDIVU_FUNCT6 = 0b100000,
RO_V_VDIVU_VX = OP_MVX | (VDIVU_FUNCT6 << kRvvFunct6Shift),
RO_V_VDIVU_VV = OP_MVV | (VDIVU_FUNCT6 << kRvvFunct6Shift),
VDIV_FUNCT6 = 0b100001,
RO_V_VDIV_VX = OP_MVX | (VDIV_FUNCT6 << kRvvFunct6Shift),
RO_V_VDIV_VV = OP_MVV | (VDIV_FUNCT6 << kRvvFunct6Shift),
VREMU_FUNCT6 = 0b100010,
RO_V_VREMU_VX = OP_MVX | (VREMU_FUNCT6 << kRvvFunct6Shift),
RO_V_VREMU_VV = OP_MVV | (VREMU_FUNCT6 << kRvvFunct6Shift),
VREM_FUNCT6 = 0b100011,
RO_V_VREM_VX = OP_MVX | (VREM_FUNCT6 << kRvvFunct6Shift),
RO_V_VREM_VV = OP_MVV | (VREM_FUNCT6 << kRvvFunct6Shift),
VMULHU_FUNCT6 = 0b100100,
RO_V_VMULHU_VX = OP_MVX | (VMULHU_FUNCT6 << kRvvFunct6Shift),
RO_V_VMULHU_VV = OP_MVV | (VMULHU_FUNCT6 << kRvvFunct6Shift),
VMUL_FUNCT6 = 0b100101,
RO_V_VMUL_VX = OP_MVX | (VMUL_FUNCT6 << kRvvFunct6Shift),
RO_V_VMUL_VV = OP_MVV | (VMUL_FUNCT6 << kRvvFunct6Shift),
VMULHSU_FUNCT6 = 0b100110,
RO_V_VMULHSU_VX = OP_MVX | (VMULHSU_FUNCT6 << kRvvFunct6Shift),
RO_V_VMULHSU_VV = OP_MVV | (VMULHSU_FUNCT6 << kRvvFunct6Shift),
VMULH_FUNCT6 = 0b100111,
RO_V_VMULH_VX = OP_MVX | (VMULH_FUNCT6 << kRvvFunct6Shift),
RO_V_VMULH_VV = OP_MVV | (VMULH_FUNCT6 << kRvvFunct6Shift),
VWADDU_FUNCT6 = 0b110000,
RO_V_VWADDU_VV = OP_MVV | (VWADDU_FUNCT6 << kRvvFunct6Shift),
RO_V_VWADDU_VX = OP_MVX | (VWADDU_FUNCT6 << kRvvFunct6Shift),
VWADDUW_FUNCT6 = 0b110101,
RO_V_VWADDUW_VX = OP_MVX | (VWADDUW_FUNCT6 << kRvvFunct6Shift),
RO_V_VWADDUW_VV = OP_MVV | (VWADDUW_FUNCT6 << kRvvFunct6Shift),
VSADDU_FUNCT6 = 0b100000,
RO_V_VSADDU_VI = OP_IVI | (VSADDU_FUNCT6 << kRvvFunct6Shift),
RO_V_VSADDU_VV = OP_IVV | (VSADDU_FUNCT6 << kRvvFunct6Shift),
......@@ -829,11 +869,20 @@ enum Opcode : uint32_t {
RO_V_VSRL_VV = OP_IVV | (VSRL_FUNCT6 << kRvvFunct6Shift),
RO_V_VSRL_VX = OP_IVX | (VSRL_FUNCT6 << kRvvFunct6Shift),
VSRA_FUNCT6 = 0b101001,
RO_V_VSRA_VI = OP_IVI | (VSRA_FUNCT6 << kRvvFunct6Shift),
RO_V_VSRA_VV = OP_IVV | (VSRA_FUNCT6 << kRvvFunct6Shift),
RO_V_VSRA_VX = OP_IVX | (VSRA_FUNCT6 << kRvvFunct6Shift),
VSLL_FUNCT6 = 0b100101,
RO_V_VSLL_VI = OP_IVI | (VSLL_FUNCT6 << kRvvFunct6Shift),
RO_V_VSLL_VV = OP_IVV | (VSLL_FUNCT6 << kRvvFunct6Shift),
RO_V_VSLL_VX = OP_IVX | (VSLL_FUNCT6 << kRvvFunct6Shift),
VSMUL_FUNCT6 = 0b100111,
RO_V_VSMUL_VV = OP_IVV | (VSMUL_FUNCT6 << kRvvFunct6Shift),
RO_V_VSMUL_VX = OP_IVX | (VSMUL_FUNCT6 << kRvvFunct6Shift),
VADC_FUNCT6 = 0b010000,
RO_V_VADC_VI = OP_IVI | (VADC_FUNCT6 << kRvvFunct6Shift),
RO_V_VADC_VV = OP_IVV | (VADC_FUNCT6 << kRvvFunct6Shift),
......@@ -856,6 +905,9 @@ enum Opcode : uint32_t {
VWFUNARY0_FUNCT6 = 0b010000,
RO_V_VFMV_FS = OP_FVV | (VWFUNARY0_FUNCT6 << kRvvFunct6Shift),
VRFUNARY0_FUNCT6 = 0b010000,
RO_V_VFMV_SF = OP_FVF | (VRFUNARY0_FUNCT6 << kRvvFunct6Shift),
VREDMAXU_FUNCT6 = 0b000110,
RO_V_VREDMAXU = OP_MVV | (VREDMAXU_FUNCT6 << kRvvFunct6Shift),
VREDMAX_FUNCT6 = 0b000111,
......@@ -878,6 +930,9 @@ enum Opcode : uint32_t {
VFNCVT_F_F_W = 0b10100,
VFCLASS_V = 0b10000,
VFSQRT_V = 0b00000,
VFSQRT7_V = 0b00100,
VFREC7_V = 0b00101,
VFADD_FUNCT6 = 0b000000,
RO_V_VFADD_VV = OP_FVV | (VFADD_FUNCT6 << kRvvFunct6Shift),
......@@ -921,6 +976,9 @@ enum Opcode : uint32_t {
RO_V_VFMAX_VV = OP_FVV | (VFMAX_FUNCT6 << kRvvFunct6Shift),
RO_V_VFMAX_VF = OP_FVF | (VFMAX_FUNCT6 << kRvvFunct6Shift),
VFREDMAX_FUNCT6 = 0b0001111,
RO_V_VFREDMAX_VV = OP_FVV | (VFREDMAX_FUNCT6 << kRvvFunct6Shift),
VFMIN_FUNCT6 = 0b000100,
RO_V_VFMIN_VV = OP_FVV | (VFMIN_FUNCT6 << kRvvFunct6Shift),
RO_V_VFMIN_VF = OP_FVF | (VFMIN_FUNCT6 << kRvvFunct6Shift),
......@@ -1788,7 +1846,7 @@ class InstructionGetters : public T {
RVV_LMUL(CAST_VLMUL)
default:
return "unknown";
#undef CAST_VSEW
#undef CAST_VLMUL
}
}
......
......@@ -2159,11 +2159,25 @@ void TurboAssembler::RoundHelper(VRegister dst, VRegister src, Register scratch,
// they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits),
// and JS round semantics specify that rounding of NaN (Infinity) returns NaN
// (Infinity), so NaN and Infinity are considered rounded value too.
li(scratch, 64 - kFloat32MantissaBits - kFloat32ExponentBits);
const int kFloatMantissaBits =
sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits;
const int kFloatExponentBits =
sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits;
const int kFloatExponentBias =
sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias;
// slli(rt, rs, 64 - (pos + size));
// if (sign_extend) {
// srai(rt, rt, 64 - size);
// } else {
// srli(rt, rt, 64 - size);
// }
li(scratch, 64 - kFloatMantissaBits - kFloatExponentBits);
vsll_vx(v_scratch, src, scratch);
li(scratch, 64 - kFloat32ExponentBits);
li(scratch, 64 - kFloatExponentBits);
vsrl_vx(v_scratch, v_scratch, scratch);
li(scratch, kFloat32ExponentBias + kFloat32MantissaBits);
li(scratch, kFloatExponentBias + kFloatMantissaBits);
vmslt_vx(v0, v_scratch, scratch);
VU.set(frm);
......
......@@ -374,8 +374,9 @@ constexpr Register kWasmInstanceRegister = a0;
constexpr Register kWasmCompileLazyFuncIndexRegister = t0;
constexpr DoubleRegister kFPReturnRegister0 = fa0;
constexpr VRegister kSimd128ScratchReg = v27;
constexpr VRegister kSimd128ScratchReg2 = v26;
constexpr VRegister kSimd128ScratchReg = v26;
constexpr VRegister kSimd128ScratchReg2 = v27;
constexpr VRegister kSimd128ScratchReg3 = v8;
constexpr VRegister kSimd128RegZero = v25;
#ifdef V8_COMPRESS_POINTERS_IN_SHARED_CAGE
......
......@@ -1953,6 +1953,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vl(i.OutputSimd128Register(), src, 0, VSew::E8);
break;
}
case kRiscvS128Zero: {
Simd128Register dst = i.OutputSimd128Register();
__ VU.set(kScratchReg, E8, m1);
__ vmv_vx(dst, zero_reg);
break;
}
case kRiscvS128AllOnes: {
__ VU.set(kScratchReg, E8, m1);
__ vmv_vx(i.OutputSimd128Register(), zero_reg);
__ vnot_vv(i.OutputSimd128Register(), i.OutputSimd128Register());
break;
}
case kRiscvS128Select: {
__ VU.set(kScratchReg, E8, m1);
__ vand_vv(kSimd128ScratchReg, i.InputSimd128Register(1),
......@@ -2004,12 +2016,57 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ WasmRvvS128const(dst, imm);
break;
}
case kRiscvI64x2Mul: {
(__ VU).set(kScratchReg, VSew::E64, Vlmul::m1);
__ vmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kRiscvI64x2Add: {
(__ VU).set(kScratchReg, VSew::E64, Vlmul::m1);
__ vadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kRiscvI8x16RoundingAverageU: {
__ VU.set(kScratchReg2, E8, m1);
__ vwaddu_vv(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ li(kScratchReg, 1);
__ vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
__ li(kScratchReg, 2);
__ VU.set(kScratchReg2, E16, m2);
__ vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
__ VU.set(kScratchReg2, E8, m1);
__ vnclipu_vi(i.OutputSimd128Register(), kSimd128ScratchReg3, 0);
break;
}
case kRiscvI16x8RoundingAverageU: {
__ VU.set(kScratchReg2, E16, m1);
__ vwaddu_vv(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ li(kScratchReg, 1);
__ vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
__ li(kScratchReg, 2);
__ VU.set(kScratchReg2, E32, m2);
__ vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
__ VU.set(kScratchReg2, E16, m1);
__ vnclipu_vi(i.OutputSimd128Register(), kSimd128ScratchReg3, 0);
break;
}
case kRiscvI16x8Mul: {
(__ VU).set(kScratchReg, VSew::E16, Vlmul::m1);
__ vmv_vx(kSimd128ScratchReg, zero_reg);
__ vmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kRiscvI16x8Q15MulRSatS: {
__ VU.set(kScratchReg, E16, m1);
__ vsmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kRiscvI16x8AddSatS: {
(__ VU).set(kScratchReg, VSew::E16, Vlmul::m1);
__ vsadd_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
......@@ -2064,6 +2121,146 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kRiscvI8x16ExtractLaneU: {
__ VU.set(kScratchReg, E8, m1);
__ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputInt8(1));
__ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
__ slli(i.OutputRegister(), i.OutputRegister(), 64 - 8);
__ srli(i.OutputRegister(), i.OutputRegister(), 64 - 8);
break;
}
case kRiscvI8x16ExtractLaneS: {
__ VU.set(kScratchReg, E8, m1);
__ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputInt8(1));
__ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
break;
}
case kRiscvI16x8ExtractLaneU: {
__ VU.set(kScratchReg, E16, m1);
__ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputInt8(1));
__ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
__ slli(i.OutputRegister(), i.OutputRegister(), 64 - 16);
__ srli(i.OutputRegister(), i.OutputRegister(), 64 - 16);
break;
}
case kRiscvI16x8ExtractLaneS: {
__ VU.set(kScratchReg, E16, m1);
__ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputInt8(1));
__ vmv_xs(i.OutputRegister(), kSimd128ScratchReg);
break;
}
case kRiscvI8x16ShrU: {
__ VU.set(kScratchReg, E8, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 8 - 1);
__ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt5(1) % 8);
}
break;
}
case kRiscvI16x8ShrU: {
__ VU.set(kScratchReg, E16, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 16 - 1);
__ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt5(1) % 16);
}
break;
}
case kRiscvI32x4ShrU: {
__ VU.set(kScratchReg, E32, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 32 - 1);
__ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt5(1) % 32);
}
break;
}
case kRiscvI64x2ShrU: {
__ VU.set(kScratchReg, E64, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 64 - 1);
__ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
if (is_uint5(i.InputInt6(1) % 64)) {
__ vsrl_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt6(1) % 64);
} else {
__ li(kScratchReg, i.InputInt6(1) % 64);
__ vsrl_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchReg);
}
}
break;
}
case kRiscvI8x16ShrS: {
__ VU.set(kScratchReg, E8, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 8 - 1);
__ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt5(1) % 8);
}
break;
}
case kRiscvI16x8ShrS: {
__ VU.set(kScratchReg, E16, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 16 - 1);
__ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt5(1) % 16);
}
break;
}
case kRiscvI32x4ShrS: {
__ VU.set(kScratchReg, E32, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 32 - 1);
__ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt5(1) % 32);
}
break;
}
case kRiscvI64x2ShrS: {
__ VU.set(kScratchReg, E64, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 64 - 1);
__ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
if (is_uint5(i.InputInt6(1) % 64)) {
__ vsra_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt6(1) % 64);
} else {
__ li(kScratchReg, i.InputInt6(1) % 64);
__ vsra_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchReg);
}
}
break;
}
case kRiscvI32x4ExtractLane: {
__ WasmRvvExtractLane(i.OutputRegister(), i.InputSimd128Register(0),
i.InputInt8(1), E32, m1);
......@@ -2071,11 +2268,38 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kRiscvI32x4Abs: {
__ VU.set(kScratchReg, E32, m1);
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ vmv_vx(kSimd128RegZero, zero_reg);
__ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
__ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
MaskType::Mask);
break;
}
case kRiscvI16x8Abs: {
__ VU.set(kScratchReg, E16, m1);
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ vmv_vx(kSimd128RegZero, zero_reg);
__ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
__ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
MaskType::Mask);
break;
}
case kRiscvI8x16Abs: {
__ VU.set(kScratchReg, E8, m1);
__ vmv_vx(kSimd128RegZero, zero_reg);
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
__ vsub_vv(i.OutputSimd128Register(), kSimd128RegZero,
i.InputSimd128Register(0), Mask);
__ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
MaskType::Mask);
break;
}
case kRiscvI64x2Abs: {
__ VU.set(kScratchReg, E64, m1);
__ vmv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ vmv_vx(kSimd128RegZero, zero_reg);
__ vmslt_vv(v0, i.InputSimd128Register(0), kSimd128RegZero);
__ vneg_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
MaskType::Mask);
break;
}
case kRiscvI8x16Eq: {
......@@ -2191,47 +2415,51 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kRiscvI8x16Shl: {
__ VU.set(kScratchReg, E8, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 8 - 1);
__ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt3(1));
i.InputInt5(1) % 8);
}
break;
}
case kRiscvI16x8Shl: {
__ VU.set(kScratchReg, E16, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 16 - 1);
__ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt4(1));
i.InputInt5(1) % 16);
}
break;
}
case kRiscvI32x4Shl: {
__ VU.set(kScratchReg, E32, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 32 - 1);
__ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
__ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt5(1));
i.InputInt5(1) % 32);
}
break;
}
case kRiscvI64x2Shl: {
__ VU.set(kScratchReg, E64, m1);
if (instr->InputAt(1)->IsRegister()) {
__ andi(i.InputRegister(1), i.InputRegister(1), 64 - 1);
__ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(1));
} else {
if (is_int5(i.InputInt6(1))) {
if (is_int5(i.InputInt6(1) % 64)) {
__ vsll_vi(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputInt6(1));
i.InputInt6(1) % 64);
} else {
__ li(kScratchReg, i.InputInt6(1));
__ li(kScratchReg, i.InputInt6(1) % 64);
__ vsll_vx(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchReg);
}
......@@ -2241,9 +2469,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kRiscvI8x16ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
__ VU.set(kScratchReg, E32, m1);
__ VU.set(kScratchReg, E64, m1);
__ li(kScratchReg, 0x1 << i.InputInt8(1));
__ vmv_sx(v0, kScratchReg);
__ VU.set(kScratchReg, E8, m1);
__ vmerge_vx(dst, i.InputRegister(2), src);
break;
}
......@@ -2412,6 +2641,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vor_vv(dst, dst, kSimd128ScratchReg);
break;
}
case kRiscvF64x2NearestInt: {
__ Round_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchReg, kSimd128ScratchReg);
break;
}
case kRiscvF64x2Trunc: {
__ Trunc_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchReg, kSimd128ScratchReg);
break;
}
case kRiscvF64x2Sqrt: {
__ VU.set(kScratchReg, E64, m1);
__ vfsqrt_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kRiscvF64x2Splat: {
(__ VU).set(kScratchReg, E64, m1);
__ fmv_x_d(kScratchReg, i.InputDoubleRegister(0));
......@@ -2464,20 +2708,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
break;
}
case kRiscvF64x2ReplaceLane: {
__ VU.set(kScratchReg, E64, m1);
__ li(kScratchReg, 0x1 << i.InputInt8(1));
__ vmv_sx(v0, kScratchReg);
__ fmv_x_d(kScratchReg, i.InputSingleRegister(2));
__ vmerge_vx(i.OutputSimd128Register(), kScratchReg,
i.InputSimd128Register(0));
break;
}
case kRiscvF64x2Lt: {
__ VU.set(kScratchReg, E64, m1);
__ vmflt_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
__ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmv_vx(i.OutputSimd128Register(), zero_reg);
__ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
break;
}
case kRiscvF64x2Le: {
__ VU.set(kScratchReg, E64, m1);
__ vmfle_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
__ vmfle_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmv_vx(i.OutputSimd128Register(), zero_reg);
__ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
break;
}
case kRiscvF64x2Pmax: {
__ VU.set(kScratchReg, E64, m1);
__ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0));
break;
}
case kRiscvF64x2Pmin: {
__ VU.set(kScratchReg, E64, m1);
__ vmflt_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
__ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0));
break;
}
case kRiscvF64x2Min: {
__ VU.set(kScratchReg, E64, m1);
const int64_t kNaN = 0x7ff8000000000000L;
......@@ -2506,6 +2773,49 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmv_vv(i.OutputSimd128Register(), kSimd128ScratchReg);
break;
}
case kRiscvF64x2Div: {
__ VU.set(kScratchReg, E64, m1);
__ vfdiv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kRiscvF64x2Mul: {
__ VU.set(kScratchReg, E64, m1);
__ VU.set(RoundingMode::RTZ);
__ vfmul_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kRiscvF64x2ExtractLane: {
__ VU.set(kScratchReg, E64, m1);
if (is_uint5(i.InputInt8(1))) {
__ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputInt8(1));
} else {
__ li(kScratchReg, i.InputInt8(1));
__ vslidedown_vx(kSimd128ScratchReg, i.InputSimd128Register(0),
kScratchReg);
}
__ vfmv_fs(i.OutputDoubleRegister(), kSimd128ScratchReg);
break;
}
case kRiscvF32x4ExtractLane: {
__ VU.set(kScratchReg, E32, m1);
__ vslidedown_vi(kSimd128ScratchReg, i.InputSimd128Register(0),
i.InputInt8(1));
__ vfmv_fs(i.OutputDoubleRegister(), kSimd128ScratchReg);
break;
}
case kRiscvF32x4Trunc: {
__ Trunc_f(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchReg, kSimd128ScratchReg);
break;
}
case kRiscvF32x4NearestInt: {
__ Round_f(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchReg, kSimd128ScratchReg);
break;
}
case kRiscvF32x4DemoteF64x2Zero: {
__ VU.set(kScratchReg, E32, m1);
__ vfncvt_f_f_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
......@@ -2567,8 +2877,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kRiscvF32x4Div: {
__ VU.set(kScratchReg, E32, m1);
__ VU.set(RoundingMode::RTZ);
__ vfdiv_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0));
__ vfdiv_vv(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kRiscvF32x4Mul: {
......@@ -2592,20 +2902,48 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
break;
}
case kRiscvF32x4ReplaceLane: {
__ VU.set(kScratchReg, E32, m1);
__ li(kScratchReg, 0x1 << i.InputInt8(1));
__ vmv_sx(v0, kScratchReg);
__ fmv_x_w(kScratchReg, i.InputSingleRegister(2));
__ vmerge_vx(i.OutputSimd128Register(), kScratchReg,
i.InputSimd128Register(0));
break;
}
case kRiscvF32x4Lt: {
__ VU.set(kScratchReg, E32, m1);
__ vmflt_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
__ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmv_vx(i.OutputSimd128Register(), zero_reg);
__ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
break;
}
case kRiscvF32x4Le: {
__ VU.set(kScratchReg, E32, m1);
__ vmfle_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
__ vmfle_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmv_vx(i.OutputSimd128Register(), zero_reg);
__ vmerge_vi(i.OutputSimd128Register(), -1, i.OutputSimd128Register());
break;
}
case kRiscvF32x4Pmax: {
__ VU.set(kScratchReg, E32, m1);
__ vmflt_vv(v0, i.InputSimd128Register(0), i.InputSimd128Register(1));
__ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0));
break;
}
case kRiscvF32x4Pmin: {
__ VU.set(kScratchReg, E32, m1);
__ vmflt_vv(v0, i.InputSimd128Register(1), i.InputSimd128Register(0));
__ vmerge_vv(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0));
break;
}
case kRiscvF32x4Sqrt: {
__ VU.set(kScratchReg, E32, m1);
__ vfsqrt_v(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kRiscvF32x4Max: {
__ VU.set(kScratchReg, E32, m1);
const int32_t kNaN = 0x7FC00000;
......
......@@ -2706,10 +2706,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I64x2Neg, kRiscvI64x2Neg) \
V(I64x2Abs, kRiscvI64x2Abs) \
V(I64x2BitMask, kRiscvI64x2BitMask) \
V(I64x2Eq, kRiscvI64x2Eq) \
V(I64x2Ne, kRiscvI64x2Ne) \
V(I64x2GtS, kRiscvI64x2GtS) \
V(I64x2GeS, kRiscvI64x2GeS) \
V(F32x4SConvertI32x4, kRiscvF32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kRiscvF32x4UConvertI32x4) \
V(F32x4Abs, kRiscvF32x4Abs) \
......@@ -2780,6 +2776,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F64x2Ne, kRiscvF64x2Ne) \
V(F64x2Lt, kRiscvF64x2Lt) \
V(F64x2Le, kRiscvF64x2Le) \
V(I64x2Eq, kRiscvI64x2Eq) \
V(I64x2Ne, kRiscvI64x2Ne) \
V(I64x2GtS, kRiscvI64x2GtS) \
V(I64x2GeS, kRiscvI64x2GeS) \
V(I64x2Add, kRiscvI64x2Add) \
V(I64x2Sub, kRiscvI64x2Sub) \
V(I64x2Mul, kRiscvI64x2Mul) \
......
......@@ -1923,6 +1923,9 @@ void Decoder::DecodeRvvIVV(Instruction* instr) {
case RO_V_VSSUB_VV:
Format(instr, "vssub.vv 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VSSUBU_VV:
Format(instr, "vssubu.vv 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VMIN_VV:
Format(instr, "vmin.vv 'vd, 'vs2, 'vs1'vm");
break;
......@@ -1992,6 +1995,18 @@ void Decoder::DecodeRvvIVV(Instruction* instr) {
case RO_V_VNCLIPU_WV:
Format(instr, "vnclipu.wv 'vd, 'vs2, 'vs1");
break;
case RO_V_VSLL_VV:
Format(instr, "vsll.vv 'vd, 'vs2, 'vs1");
break;
case RO_V_VSRL_VV:
Format(instr, "vsrl.vv 'vd, 'vs2, 'vs1");
break;
case RO_V_VSRA_VV:
Format(instr, "vsra.vv 'vd, 'vs2, 'vs1");
break;
case RO_V_VSMUL_VV:
Format(instr, "vsmul.vv 'vd, 'vs2, 'vs1");
break;
default:
UNSUPPORTED_RISCV();
break;
......@@ -2056,6 +2071,9 @@ void Decoder::DecodeRvvIVI(Instruction* instr) {
case RO_V_VSRL_VI:
Format(instr, "vsrl.vi 'vd, 'vs2, 'uimm5'vm");
break;
case RO_V_VSRA_VI:
Format(instr, "vsra.vi 'vd, 'vs2, 'uimm5'vm");
break;
case RO_V_VSLL_VI:
Format(instr, "vsll.vi 'vd, 'vs2, 'uimm5'vm");
break;
......@@ -2184,12 +2202,18 @@ void Decoder::DecodeRvvIVX(Instruction* instr) {
case RO_V_VSRL_VX:
Format(instr, "vsrl.vx 'vd, 'vs2, 'rs1");
break;
case RO_V_VSRA_VX:
Format(instr, "vsra.vx 'vd, 'vs2, 'rs1");
break;
case RO_V_VNCLIP_WX:
Format(instr, "vnclip.wx 'vd, 'vs2, 'rs1");
break;
case RO_V_VNCLIPU_WX:
Format(instr, "vnclipu.wx 'vd, 'vs2, 'rs1");
break;
case RO_V_VSMUL_VX:
Format(instr, "vsmul.vx 'vd, 'vs2, 'vs1");
break;
default:
UNSUPPORTED_RISCV();
break;
......@@ -2235,6 +2259,21 @@ void Decoder::DecodeRvvMVV(Instruction* instr) {
UNSUPPORTED_RISCV();
}
break;
case RO_V_VMUL_VV:
Format(instr, "vmul.vv 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VMULHU_VV:
Format(instr, "vmulhu.vv 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VDIV_VV:
Format(instr, "vdiv.vv 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VDIVU_VV:
Format(instr, "vdivu.vv 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VWADDU_VV:
Format(instr, "vwaddu.vv 'vd, 'vs2, 'vs1'vm");
break;
default:
UNSUPPORTED_RISCV();
break;
......@@ -2251,6 +2290,21 @@ void Decoder::DecodeRvvMVX(Instruction* instr) {
UNSUPPORTED_RISCV();
}
break;
case RO_V_VMUL_VX:
Format(instr, "vmul.vx 'vd, 'vs2, 'rs1'vm");
break;
case RO_V_VMULHU_VX:
Format(instr, "vmulhu.vx 'vd, 'vs2, 'rs1'vm");
break;
case RO_V_VDIV_VX:
Format(instr, "vdiv.vx 'vd, 'vs2, 'rs1'vm");
break;
case RO_V_VDIVU_VX:
Format(instr, "vdivu.vx 'vd, 'vs2, 'rs1'vm");
break;
case RO_V_VWADDUW_VX:
Format(instr, "vwaddu.wx 'vd, 'vs2, 'rs1'vm");
break;
default:
UNSUPPORTED_RISCV();
break;
......@@ -2287,6 +2341,9 @@ void Decoder::DecodeRvvFVV(Instruction* instr) {
case VFCLASS_V:
Format(instr, "vfclass.v 'vd, 'vs2'vm");
break;
case VFSQRT_V:
Format(instr, "vfsqrt.v 'vd, 'vs2'vm");
break;
default:
break;
}
......@@ -2306,6 +2363,9 @@ void Decoder::DecodeRvvFVV(Instruction* instr) {
case RO_V_VFMAX_VV:
Format(instr, "vfmax.vv 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VFREDMAX_VV:
Format(instr, "vfredmax.vs 'vd, 'vs2, 'vs1'vm");
break;
case RO_V_VFMIN_VV:
Format(instr, "vfmin.vv 'vd, 'vs2, 'vs1'vm");
break;
......
......@@ -87,6 +87,42 @@
// PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
// HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
// MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
static inline bool is_aligned(const unsigned val, const unsigned pos) {
return pos ? (val & (pos - 1)) == 0 : true;
}
static inline bool is_overlapped(const int astart, int asize, const int bstart,
int bsize) {
asize = asize == 0 ? 1 : asize;
bsize = bsize == 0 ? 1 : bsize;
const int aend = astart + asize;
const int bend = bstart + bsize;
return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize;
}
static inline bool is_overlapped_widen(const int astart, int asize,
const int bstart, int bsize) {
asize = asize == 0 ? 1 : asize;
bsize = bsize == 0 ? 1 : bsize;
const int aend = astart + asize;
const int bend = bstart + bsize;
if (astart < bstart && is_overlapped(astart, asize, bstart, bsize) &&
!is_overlapped(astart, asize, bstart + bsize, bsize)) {
return false;
} else {
return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize;
}
}
#define require_align(val, pos) CHECK_EQ(is_aligned(val, pos), true)
#define require_noover(astart, asize, bstart, bsize) \
CHECK_EQ(!is_overlapped(astart, asize, bstart, bsize), true)
#define require_noover_widen(astart, asize, bstart, bsize) \
CHECK_EQ(!is_overlapped_widen(astart, asize, bstart, bsize), true)
#define RVV_VI_GENERAL_LOOP_BASE \
for (uint64_t i = rvv_vstart(); i < rvv_vl(); i++) {
#define RVV_VI_LOOP_END \
......@@ -250,6 +286,118 @@
RVV_VI_LOOP_END \
rvv_trace_vd();
// widen operation loop
#define VI_WIDE_CHECK_COMMON \
CHECK_LE(rvv_vflmul(), 4); \
CHECK_LE(rvv_vsew() * 2, kRvvELEN); \
require_align(rvv_vd_reg(), rvv_vflmul() * 2); \
require_vm;
#define RVV_VI_CHECK_DDS(is_rs) \
VI_WIDE_CHECK_COMMON; \
require_align(rvv_vs2_reg(), rvv_vflmul() * 2); \
if (is_rs) { \
require_align(rvv_vs1_reg(), rvv_vflmul()); \
if (rvv_vflmul() < 1) { \
require_noover(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \
rvv_vflmul()); \
} else { \
require_noover_widen(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \
rvv_vflmul()); \
} \
}
#define RVV_VI_CHECK_DSS(is_vs1) \
VI_WIDE_CHECK_COMMON; \
require_align(rvv_vs2_reg(), rvv_vflmul()); \
if (rvv_vflmul() < 1) { \
require_noover(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs2_reg(), \
rvv_vflmul()); \
} else { \
require_noover_widen(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs2_reg(), \
rvv_vflmul()); \
} \
if (is_vs1) { \
require_align(rvv_vs1_reg(), rvv_vflmul()); \
if (rvv_vflmul() < 1) { \
require_noover(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \
rvv_vflmul()); \
} else { \
require_noover_widen(rvv_vd_reg(), rvv_vflmul() * 2, rvv_vs1_reg(), \
rvv_vflmul()); \
} \
}
#define RVV_VI_VV_LOOP_WIDEN(BODY) \
RVV_VI_GENERAL_LOOP_BASE \
RVV_VI_LOOP_MASK_SKIP() \
if (rvv_vsew() == E8) { \
VV_PARAMS(8); \
BODY; \
} else if (rvv_vsew() == E16) { \
VV_PARAMS(16); \
BODY; \
} else if (rvv_vsew() == E32) { \
VV_PARAMS(32); \
BODY; \
} \
RVV_VI_LOOP_END \
rvv_trace_vd();
#define RVV_VI_VX_LOOP_WIDEN(BODY) \
RVV_VI_GENERAL_LOOP_BASE \
if (rvv_vsew() == E8) { \
VX_PARAMS(8); \
BODY; \
} else if (rvv_vsew() == E16) { \
VX_PARAMS(16); \
BODY; \
} else if (rvv_vsew() == E32) { \
VX_PARAMS(32); \
BODY; \
} \
RVV_VI_LOOP_END \
rvv_trace_vd();
#define VI_WIDE_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \
switch (rvv_vsew()) { \
case E8: { \
Rvvelt<uint16_t>(rvv_vd_reg(), i, true) = \
op1((sign##16_t)(sign##8_t)var0 op0(sign##16_t)(sign##8_t) var1) + \
var2; \
} break; \
case E16: { \
Rvvelt<uint32_t>(rvv_vd_reg(), i, true) = \
op1((sign##32_t)(sign##16_t)var0 op0(sign##32_t)(sign##16_t) var1) + \
var2; \
} break; \
default: { \
Rvvelt<uint64_t>(rvv_vd_reg(), i, true) = \
op1((sign##64_t)(sign##32_t)var0 op0(sign##64_t)(sign##32_t) var1) + \
var2; \
} break; \
}
#define VI_WIDE_WVX_OP(var0, op0, sign) \
switch (rvv_vsew()) { \
case E8: { \
sign##16_t & vd_w = Rvvelt<sign##16_t>(rvv_vd_reg(), i, true); \
sign##16_t vs2_w = Rvvelt<sign##16_t>(rvv_vs2_reg(), i); \
vd_w = vs2_w op0(sign##16_t)(sign##8_t) var0; \
} break; \
case E16: { \
sign##32_t & vd_w = Rvvelt<sign##32_t>(rvv_vd_reg(), i, true); \
sign##32_t vs2_w = Rvvelt<sign##32_t>(rvv_vs2_reg(), i); \
vd_w = vs2_w op0(sign##32_t)(sign##16_t) var0; \
} break; \
default: { \
sign##64_t & vd_w = Rvvelt<sign##64_t>(rvv_vd_reg(), i, true); \
sign##64_t vs2_w = Rvvelt<sign##64_t>(rvv_vs2_reg(), i); \
vd_w = vs2_w op0(sign##64_t)(sign##32_t) var0; \
} break; \
}
#define RVV_VI_VVXI_MERGE_LOOP(BODY) \
RVV_VI_GENERAL_LOOP_BASE \
if (rvv_vsew() == E8) { \
......@@ -346,6 +494,9 @@
type_usew_t<x>::type uimm5 = (type_usew_t<x>::type)instr_.RvvUimm5(); \
type_usew_t<x>::type vs2 = Rvvelt<type_usew_t<x>::type>(rvv_vs2_reg(), i);
#define float32_t float
#define float64_t double
#define RVV_VI_LOOP_CMP_BASE \
CHECK(rvv_vsew() >= E8 && rvv_vsew() <= E64); \
for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) { \
......@@ -663,6 +814,37 @@
} \
rvv_trace_vd();
#define VI_VFP_LOOP_REDUCTION_BASE(width) \
float##width##_t vd_0 = Rvvelt<float##width##_t>(rvv_vd_reg(), 0); \
float##width##_t vs1_0 = Rvvelt<float##width##_t>(rvv_vs1_reg(), 0); \
vd_0 = vs1_0; \
/*bool is_active = false;*/ \
for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) { \
RVV_VI_LOOP_MASK_SKIP(); \
float##width##_t vs2 = Rvvelt<float##width##_t>(rvv_vs2_reg(), i); \
/*is_active = true;*/
#define VI_VFP_LOOP_REDUCTION_END(x) \
} \
set_rvv_vstart(0); \
if (rvv_vl() > 0) { \
Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), 0, true) = vd_0; \
}
#define RVV_VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \
if (rvv_vsew() == E16) { \
UNIMPLEMENTED(); \
} else if (rvv_vsew() == E32) { \
VI_VFP_LOOP_REDUCTION_BASE(32) \
BODY32; \
VI_VFP_LOOP_REDUCTION_END(32) \
} else if (rvv_vsew() == E64) { \
VI_VFP_LOOP_REDUCTION_BASE(64) \
BODY64; \
VI_VFP_LOOP_REDUCTION_END(64) \
} \
rvv_trace_vd();
// reduction loop - unsgied
#define RVV_VI_ULOOP_REDUCTION_BASE(x) \
auto& vd_0_des = Rvvelt<type_usew_t<x>::type>(rvv_vd_reg(), 0, true); \
......@@ -738,7 +920,7 @@
set_rvv_vstart(0); \
if (::v8::internal::FLAG_trace_sim) { \
__int128_t value = Vregister_[rvv_vd_reg()]; \
SNPrintF(trace_buf_, "0x%016" PRIx64 "%016" PRIx64 " <-- 0x%016" PRIx64, \
SNPrintF(trace_buf_, "%016" PRIx64 "%016" PRIx64 " <-- 0x%016" PRIx64, \
*(reinterpret_cast<int64_t*>(&value) + 1), \
*reinterpret_cast<int64_t*>(&value), \
(uint64_t)(get_register(rs1_reg()))); \
......@@ -762,7 +944,7 @@
set_rvv_vstart(0); \
if (::v8::internal::FLAG_trace_sim) { \
__int128_t value = Vregister_[rvv_vd_reg()]; \
SNPrintF(trace_buf_, "0x%016" PRIx64 "%016" PRIx64 " --> 0x%016" PRIx64, \
SNPrintF(trace_buf_, "%016" PRIx64 "%016" PRIx64 " --> 0x%016" PRIx64, \
*(reinterpret_cast<int64_t*>(&value) + 1), \
*reinterpret_cast<int64_t*>(&value), \
(uint64_t)(get_register(rs1_reg()))); \
......@@ -896,7 +1078,24 @@ inline Dst unsigned_saturation(Src v, uint n) {
RVV_VI_LOOP_END \
rvv_trace_vd();
#define CHECK_EXT(div) \
CHECK_NE(rvv_vd_reg(), rvv_vs2_reg()); \
reg_t from = rvv_vsew() / div; \
CHECK(from >= E8 && from <= E64); \
CHECK_GE((float)rvv_vflmul() / div, 0.125); \
CHECK_LE((float)rvv_vflmul() / div, 8); \
require_align(rvv_vd_reg(), rvv_vflmul()); \
require_align(rvv_vs2_reg(), rvv_vflmul() / div); \
if ((rvv_vflmul() / div) < 1) { \
require_noover(rvv_vd_reg(), rvv_vflmul(), rvv_vs2_reg(), \
rvv_vflmul() / div); \
} else { \
require_noover_widen(rvv_vd_reg(), rvv_vflmul(), rvv_vs2_reg(), \
rvv_vflmul() / div); \
}
#define RVV_VI_VIE_8_LOOP(signed) \
CHECK_EXT(8) \
RVV_VI_GENERAL_LOOP_BASE \
RVV_VI_LOOP_MASK_SKIP() \
if (rvv_vsew() == E64) { \
......@@ -914,6 +1113,7 @@ inline Dst unsigned_saturation(Src v, uint n) {
rvv_trace_vd();
#define RVV_VI_VIE_4_LOOP(signed) \
CHECK_EXT(4) \
RVV_VI_GENERAL_LOOP_BASE \
RVV_VI_LOOP_MASK_SKIP() \
if (rvv_vsew() == E32) { \
......@@ -939,6 +1139,7 @@ inline Dst unsigned_saturation(Src v, uint n) {
rvv_trace_vd();
#define RVV_VI_VIE_2_LOOP(signed) \
CHECK_EXT(2) \
RVV_VI_GENERAL_LOOP_BASE \
RVV_VI_LOOP_MASK_SKIP() \
if (rvv_vsew() == E16) { \
......@@ -2200,12 +2401,13 @@ T Simulator::ReadMem(int64_t addr, Instruction* instr) {
DieOrDebug();
}
#ifndef V8_COMPRESS_POINTERS // TODO(RISCV): v8:11812
// check for natural alignment
if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) {
PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr,
reinterpret_cast<intptr_t>(instr));
DieOrDebug();
}
// // check for natural alignment
// if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) {
// PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n",
// addr,
// reinterpret_cast<intptr_t>(instr));
// DieOrDebug();
// }
#endif
T* ptr = reinterpret_cast<T*>(addr);
T value = *ptr;
......@@ -2478,18 +2680,18 @@ void Simulator::SoftwareInterrupt() {
"Call to host function %s at %p "
"args %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64
" , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64
" , %08" PRIx64 " , %08" PRIx64 " , %016" PRIx64
" , %016" PRIx64 " , %016" PRIx64 " , %016" PRIx64
" , %016" PRIx64 " , %016" PRIx64 " , %016" PRIx64
" , %016" PRIx64 " , %016" PRIx64 " , %016" PRIx64 " \n",
" , %08" PRIx64 " , %08" PRIx64 " , %016" PRIx64 " , %016" PRIx64
" , %016" PRIx64 " , %016" PRIx64 " , %016" PRIx64 " , %016" PRIx64
" , %016" PRIx64 " , %016" PRIx64 " , %016" PRIx64 " , %016" PRIx64
" \n",
ExternalReferenceTable::NameOfIsolateIndependentAddress(pc),
reinterpret_cast<void*>(FUNCTION_ADDR(target)), arg0, arg1, arg2,
arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12,
arg13, arg14, arg15, arg16, arg17, arg18, arg19);
}
ObjectPair result = target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
arg8, arg9, arg10, arg11, arg12, arg13, arg14,
arg15, arg16, arg17, arg18, arg19);
arg8, arg9, arg10, arg11, arg12, arg13, arg14,
arg15, arg16, arg17, arg18, arg19);
set_register(a0, (int64_t)(result.x));
set_register(a1, (int64_t)(result.y));
}
......@@ -4552,6 +4754,28 @@ static inline T sat_sub(T x, T y, bool& sat) {
return res;
}
template <typename T>
T sat_addu(T x, T y, bool& sat) {
T res = x + y;
sat = false;
sat = res < x;
res |= -(res < x);
return res;
}
template <typename T>
T sat_subu(T x, T y, bool& sat) {
T res = x - y;
sat = false;
sat = !(res <= x);
res &= -(res <= x);
return res;
}
void Simulator::DecodeRvvIVV() {
DCHECK_EQ(instr_.InstructionBits() & (kBaseOpcodeMask | kFunct3Mask), OP_IVV);
switch (instr_.InstructionBits() & kVTypeMask) {
......@@ -4627,6 +4851,35 @@ void Simulator::DecodeRvvIVV() {
RVV_VI_LOOP_END
break;
}
case RO_V_VSSUBU_VV: {
RVV_VI_GENERAL_LOOP_BASE
bool sat = false;
switch (rvv_vsew()) {
case E8: {
VV_UPARAMS(8);
vd = sat_subu<uint8_t>(vs2, vs1, sat);
break;
}
case E16: {
VV_UPARAMS(16);
vd = sat_subu<uint16_t>(vs2, vs1, sat);
break;
}
case E32: {
VV_UPARAMS(32);
vd = sat_subu<uint32_t>(vs2, vs1, sat);
break;
}
default: {
VV_UPARAMS(64);
vd = sat_subu<uint64_t>(vs2, vs1, sat);
break;
}
}
set_rvv_vxsat(sat);
RVV_VI_LOOP_END
break;
}
case RO_V_VAND_VV: {
RVV_VI_VV_LOOP({ vd = vs1 & vs2; })
break;
......@@ -4735,6 +4988,42 @@ void Simulator::DecodeRvvIVV() {
RVV_VI_VV_LOOP({ vd = vs2 << vs1; })
break;
}
case RO_V_VSRL_VV:
RVV_VI_VV_ULOOP({ vd = vs2 >> vs1; })
break;
case RO_V_VSRA_VV:
RVV_VI_VV_LOOP({ vd = vs2 >> vs1; })
break;
case RO_V_VSMUL_VV: {
RVV_VI_GENERAL_LOOP_BASE
RVV_VI_LOOP_MASK_SKIP()
if (rvv_vsew() == E8) {
VV_PARAMS(8);
int16_t result = (int16_t)vs1 * (int16_t)vs2;
result += get_round(static_cast<int>(rvv_vxrm()), result, 7);
vd = signed_saturation<int16_t, int8_t>(result, 8);
} else if (rvv_vsew() == E16) {
VV_PARAMS(16);
int32_t result = (int32_t)vs1 * (int32_t)vs2;
result += get_round(static_cast<int>(rvv_vxrm()), result, 15);
vd = signed_saturation<int32_t, int16_t>(result, 16);
} else if (rvv_vsew() == E32) {
VV_PARAMS(32);
int64_t result = (int64_t)vs1 * (int64_t)vs2;
result += get_round(static_cast<int>(rvv_vxrm()), result, 31);
vd = signed_saturation<int64_t, int32_t>(result, 32);
} else if (rvv_vsew() == E64) {
VV_PARAMS(64);
__int128_t result = (__int128_t)vs1 * (__int128_t)vs2;
result += get_round(static_cast<int>(rvv_vxrm()), result, 63);
vd = signed_saturation<__int128_t, int64_t>(result, 64);
} else {
UNREACHABLE();
}
RVV_VI_LOOP_END
rvv_trace_vd();
break;
}
case RO_V_VRGATHER_VV: {
RVV_VI_GENERAL_LOOP_BASE
CHECK_NE(rvv_vs1_reg(), rvv_vd_reg());
......@@ -4829,7 +5118,7 @@ void Simulator::DecodeRvvIVI() {
break;
}
case RO_V_VRSUB_VI: {
RVV_VI_VI_LOOP({ vd = vs2 - simm5; })
RVV_VI_VI_LOOP({ vd = simm5 - vs2; })
break;
}
case RO_V_VAND_VI: {
......@@ -4908,10 +5197,13 @@ void Simulator::DecodeRvvIVI() {
RVV_VI_LOOP_END
} break;
case RO_V_VSRL_VI:
RVV_VI_VI_LOOP({ vd = vs2 >> simm5; })
RVV_VI_VI_ULOOP({ vd = vs2 >> uimm5; })
break;
case RO_V_VSRA_VI:
RVV_VI_VI_LOOP({ vd = vs2 >> (simm5 & (rvv_sew() - 1) & 0x1f); })
break;
case RO_V_VSLL_VI:
RVV_VI_VI_LOOP({ vd = vs2 << simm5; })
RVV_VI_VI_ULOOP({ vd = vs2 << uimm5; })
break;
case RO_V_VADC_VI:
if (instr_.RvvVM()) {
......@@ -5128,7 +5420,11 @@ void Simulator::DecodeRvvIVX() {
break;
}
case RO_V_VSRL_VX: {
RVV_VI_VX_LOOP({ vd = int32_t(uint32_t(vs2) >> (rs1 & (xlen - 1))); })
RVV_VI_VX_ULOOP({ vd = (vs2 >> (rs1 & (rvv_sew() - 1))); })
break;
}
case RO_V_VSRA_VX: {
RVV_VI_VX_LOOP({ vd = ((vs2) >> (rs1 & (rvv_sew() - 1))); })
break;
}
default:
......@@ -5140,6 +5436,18 @@ void Simulator::DecodeRvvIVX() {
void Simulator::DecodeRvvMVV() {
DCHECK_EQ(instr_.InstructionBits() & (kBaseOpcodeMask | kFunct3Mask), OP_MVV);
switch (instr_.InstructionBits() & kVTypeMask) {
case RO_V_VMUL_VV: {
RVV_VI_VV_LOOP({ vd = vs2 * vs1; })
break;
}
case RO_V_VDIV_VV: {
RVV_VI_VV_LOOP({ vd = vs2 / vs1; })
break;
}
case RO_V_VDIVU_VV: {
RVV_VI_VV_LOOP({ vd = vs2 / vs1; })
break;
}
case RO_V_VWXUNARY0: {
if (rvv_vs1_reg() == 0) {
switch (rvv_vsew()) {
......@@ -5159,7 +5467,7 @@ void Simulator::DecodeRvvMVV() {
UNREACHABLE();
}
set_rvv_vstart(0);
SNPrintF(trace_buf_, "0x%ld", get_register(rd_reg()));
SNPrintF(trace_buf_, "%lx", get_register(rd_reg()));
} else {
v8::base::EmbeddedVector<char, 256> buffer;
disasm::NameConverter converter;
......@@ -5203,6 +5511,13 @@ void Simulator::DecodeRvvMVV() {
UNSUPPORTED_RISCV();
}
break;
case RO_V_VWADDU_VV:
RVV_VI_CHECK_DSS(true);
RVV_VI_VV_LOOP_WIDEN({
VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, uint);
USE(vd);
})
break;
default:
v8::base::EmbeddedVector<char, 256> buffer;
disasm::NameConverter converter;
......@@ -5249,6 +5564,27 @@ void Simulator::DecodeRvvMVX() {
UNSUPPORTED_RISCV();
}
break;
case RO_V_VDIV_VX: {
RVV_VI_VX_LOOP({ vd = vs2 / rs1; })
break;
}
case RO_V_VDIVU_VX: {
RVV_VI_VX_ULOOP({ vd = vs2 / rs1; })
break;
}
case RO_V_VMUL_VX: {
RVV_VI_VX_LOOP({ vd = vs2 * rs1; })
break;
}
case RO_V_VWADDUW_VX: {
RVV_VI_CHECK_DDS(false);
RVV_VI_VX_LOOP_WIDEN({
VI_WIDE_WVX_OP(rs1, +, uint);
USE(vd);
USE(vs2);
})
break;
}
default:
v8::base::EmbeddedVector<char, 256> buffer;
disasm::NameConverter converter;
......@@ -5298,13 +5634,13 @@ void Simulator::DecodeRvvFVV() {
if (is_invalid_fdiv(vs1, vs2)) {
this->set_fflags(kInvalidOperation);
return std::numeric_limits<double>::quiet_NaN();
} else if (vs2 == 0.0f) {
} else if (vs1 == 0.0f) {
this->set_fflags(kDivideByZero);
return (std::signbit(vs1) == std::signbit(vs2)
? std::numeric_limits<double>::infinity()
: -std::numeric_limits<double>::infinity());
} else {
return vs1 / vs2;
return vs2 / vs1;
}
};
auto alu_out = fn(vs1, vs2);
......@@ -5463,29 +5799,40 @@ void Simulator::DecodeRvvFVV() {
USE(vd);
})
break;
case VFSQRT_V:
RVV_VI_VFP_VF_LOOP({ UNIMPLEMENTED(); },
{
vd = std::sqrt(vs2);
USE(fs1);
},
{
vd = std::sqrt(vs2);
USE(fs1);
})
break;
default:
break;
}
break;
case RO_V_VMFEQ_VV: {
RVV_VI_VFP_LOOP_CMP({ UNIMPLEMENTED(); },
{ res = CompareFHelper(vs1, vs2, EQ); },
{ res = CompareFHelper(vs1, vs2, EQ); }, true)
{ res = CompareFHelper(vs2, vs1, EQ); },
{ res = CompareFHelper(vs2, vs1, EQ); }, true)
} break;
case RO_V_VMFNE_VV: {
RVV_VI_VFP_LOOP_CMP({ UNIMPLEMENTED(); },
{ res = CompareFHelper(vs1, vs2, NE); },
{ res = CompareFHelper(vs1, vs2, NE); }, true)
{ res = CompareFHelper(vs2, vs1, NE); },
{ res = CompareFHelper(vs2, vs1, NE); }, true)
} break;
case RO_V_VMFLT_VV: {
RVV_VI_VFP_LOOP_CMP({ UNIMPLEMENTED(); },
{ res = CompareFHelper(vs1, vs2, LT); },
{ res = CompareFHelper(vs1, vs2, LT); }, true)
{ res = CompareFHelper(vs2, vs1, LT); },
{ res = CompareFHelper(vs2, vs1, LT); }, true)
} break;
case RO_V_VMFLE_VV: {
RVV_VI_VFP_LOOP_CMP({ UNIMPLEMENTED(); },
{ res = CompareFHelper(vs1, vs2, LE); },
{ res = CompareFHelper(vs1, vs2, LE); }, true)
{ res = CompareFHelper(vs2, vs1, LE); },
{ res = CompareFHelper(vs2, vs1, LE); }, true)
} break;
case RO_V_VFMAX_VV: {
RVV_VI_VFP_VV_LOOP({ UNIMPLEMENTED(); },
......@@ -5493,6 +5840,13 @@ void Simulator::DecodeRvvFVV() {
{ vd = FMaxMinHelper(vs2, vs1, MaxMinKind::kMax); })
break;
}
case RO_V_VFREDMAX_VV: {
RVV_VI_VFP_VV_LOOP_REDUCTION(
{ UNIMPLEMENTED(); },
{ vd_0 = FMaxMinHelper(vd_0, vs2, MaxMinKind::kMax); },
{ vd_0 = FMaxMinHelper(vd_0, vs2, MaxMinKind::kMax); })
break;
}
case RO_V_VFMIN_VV: {
RVV_VI_VFP_VV_LOOP({ UNIMPLEMENTED(); },
{ vd = FMaxMinHelper(vs2, vs1, MaxMinKind::kMin); },
......
......@@ -69,60 +69,6 @@ T Nabs(T a) {
return a < 0 ? a : -a;
}
template <uint64_t N>
struct type_usew_t;
template <>
struct type_usew_t<8> {
using type = uint8_t;
};
template <>
struct type_usew_t<16> {
using type = uint16_t;
};
template <>
struct type_usew_t<32> {
using type = uint32_t;
};
template <>
struct type_usew_t<64> {
using type = uint64_t;
};
template <>
struct type_usew_t<128> {
using type = __uint128_t;
};
template <uint64_t N>
struct type_sew_t;
template <>
struct type_sew_t<8> {
using type = int8_t;
};
template <>
struct type_sew_t<16> {
using type = int16_t;
};
template <>
struct type_sew_t<32> {
using type = int32_t;
};
template <>
struct type_sew_t<64> {
using type = int64_t;
};
template <>
struct type_sew_t<128> {
using type = __int128_t;
};
#if defined(USE_SIMULATOR)
// Running with a simulator.
......@@ -446,6 +392,13 @@ class Simulator : public SimulatorBase {
inline uint64_t rvv_vlenb() const { return vlenb_; }
inline uint32_t rvv_zimm() const { return instr_.Rvvzimm(); }
inline uint32_t rvv_vlmul() const { return (rvv_vtype() & 0x7); }
inline float rvv_vflmul() const {
if ((rvv_vtype() & 0b100) == 0) {
return static_cast<float>(0x1 << (rvv_vtype() & 0x7));
} else {
return 1.0 / static_cast<float>(0x1 << (4 - rvv_vtype() & 0x7));
}
}
inline uint32_t rvv_vsew() const { return ((rvv_vtype() >> 3) & 0x7); }
inline const char* rvv_sew_s() const {
......@@ -470,7 +423,7 @@ class Simulator : public SimulatorBase {
RVV_LMUL(CAST_VLMUL)
default:
return "unknown";
#undef CAST_VSEW
#undef CAST_VLMUL
}
}
......@@ -726,6 +679,60 @@ class Simulator : public SimulatorBase {
// PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
// HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
// MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
template <uint64_t N>
struct type_usew_t;
template <>
struct type_usew_t<8> {
using type = uint8_t;
};
template <>
struct type_usew_t<16> {
using type = uint16_t;
};
template <>
struct type_usew_t<32> {
using type = uint32_t;
};
template <>
struct type_usew_t<64> {
using type = uint64_t;
};
template <>
struct type_usew_t<128> {
using type = __uint128_t;
};
template <uint64_t N>
struct type_sew_t;
template <>
struct type_sew_t<8> {
using type = int8_t;
};
template <>
struct type_sew_t<16> {
using type = int16_t;
};
template <>
struct type_sew_t<32> {
using type = int32_t;
};
template <>
struct type_sew_t<64> {
using type = int64_t;
};
template <>
struct type_sew_t<128> {
using type = __int128_t;
};
#define VV_PARAMS(x) \
type_sew_t<x>::type& vd = \
Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \
......@@ -806,7 +813,7 @@ class Simulator : public SimulatorBase {
inline void rvv_trace_vd() {
if (::v8::internal::FLAG_trace_sim) {
__int128_t value = Vregister_[rvv_vd_reg()];
SNPrintF(trace_buf_, "0x%016" PRIx64 "%016" PRIx64 " (%" PRId64 ")",
SNPrintF(trace_buf_, "%016" PRIx64 "%016" PRIx64 " (%" PRId64 ")",
*(reinterpret_cast<int64_t*>(&value) + 1),
*reinterpret_cast<int64_t*>(&value), icount_);
}
......
......@@ -1691,21 +1691,149 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
LoadType type,
LoadTransformationKind transform,
uint32_t* protected_load_pc) {
bailout(kSimd, "load extend and load splat unimplemented");
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
MemOperand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
VRegister dst_v = dst.fp().toV();
*protected_load_pc = pc_offset();
MachineType memtype = type.mem_type();
if (transform == LoadTransformationKind::kExtend) {
Ld(scratch, src_op);
if (memtype == MachineType::Int8()) {
VU.set(kScratchReg, E64, m1);
vmv_vx(kSimd128ScratchReg, scratch);
VU.set(kScratchReg, E16, m1);
vsext_vf2(dst_v, kSimd128ScratchReg);
} else if (memtype == MachineType::Uint8()) {
VU.set(kScratchReg, E64, m1);
vmv_vx(kSimd128ScratchReg, scratch);
VU.set(kScratchReg, E16, m1);
vzext_vf2(dst_v, kSimd128ScratchReg);
} else if (memtype == MachineType::Int16()) {
VU.set(kScratchReg, E64, m1);
vmv_vx(kSimd128ScratchReg, scratch);
VU.set(kScratchReg, E32, m1);
vsext_vf2(dst_v, kSimd128ScratchReg);
} else if (memtype == MachineType::Uint16()) {
VU.set(kScratchReg, E64, m1);
vmv_vx(kSimd128ScratchReg, scratch);
VU.set(kScratchReg, E32, m1);
vzext_vf2(dst_v, kSimd128ScratchReg);
} else if (memtype == MachineType::Int32()) {
VU.set(kScratchReg, E64, m1);
vmv_vx(kSimd128ScratchReg, scratch);
vsext_vf2(dst_v, kSimd128ScratchReg);
} else if (memtype == MachineType::Uint32()) {
VU.set(kScratchReg, E64, m1);
vmv_vx(kSimd128ScratchReg, scratch);
vzext_vf2(dst_v, kSimd128ScratchReg);
}
} else if (transform == LoadTransformationKind::kZeroExtend) {
vxor_vv(dst_v, dst_v, dst_v);
if (memtype == MachineType::Int32()) {
VU.set(kScratchReg, E32, m1);
Lwu(scratch, src_op);
li(kScratchReg, 0x1 << 0);
vmv_sx(v0, kScratchReg);
vmerge_vx(dst_v, scratch, dst_v);
} else {
DCHECK_EQ(MachineType::Int64(), memtype);
VU.set(kScratchReg, E64, m1);
Ld(scratch, src_op);
li(kScratchReg, 0x1 << 0);
vmv_sx(v0, kScratchReg);
vmerge_vx(dst_v, scratch, dst_v);
}
} else {
DCHECK_EQ(LoadTransformationKind::kSplat, transform);
if (memtype == MachineType::Int8()) {
VU.set(kScratchReg, E8, m1);
Lb(scratch, src_op);
vmv_vx(dst_v, scratch);
} else if (memtype == MachineType::Int16()) {
VU.set(kScratchReg, E16, m1);
Lh(scratch, src_op);
vmv_vx(dst_v, scratch);
} else if (memtype == MachineType::Int32()) {
VU.set(kScratchReg, E32, m1);
Lw(scratch, src_op);
vmv_vx(dst_v, scratch);
} else if (memtype == MachineType::Int64()) {
VU.set(kScratchReg, E64, m1);
Ld(scratch, src_op);
vmv_vx(dst_v, scratch);
}
}
}
void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
Register addr, Register offset_reg,
uintptr_t offset_imm, LoadType type,
uint8_t laneidx, uint32_t* protected_load_pc) {
bailout(kSimd, "loadlane");
MemOperand src_op = liftoff::GetMemOp(this, addr, offset_reg, offset_imm);
MachineType mem_type = type.mem_type();
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
*protected_load_pc = pc_offset();
if (mem_type == MachineType::Int8()) {
Lbu(scratch, src_op);
VU.set(kScratchReg, E8, m1);
li(kScratchReg, 0x1 << laneidx);
vmv_sx(v0, kScratchReg);
vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
} else if (mem_type == MachineType::Int16()) {
Lhu(scratch, src_op);
VU.set(kScratchReg, E16, m1);
li(kScratchReg, 0x1 << laneidx);
vmv_sx(v0, kScratchReg);
vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
} else if (mem_type == MachineType::Int32()) {
Lwu(scratch, src_op);
VU.set(kScratchReg, E32, m1);
li(kScratchReg, 0x1 << laneidx);
vmv_sx(v0, kScratchReg);
vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
} else if (mem_type == MachineType::Int64()) {
Ld(scratch, src_op);
VU.set(kScratchReg, E64, m1);
li(kScratchReg, 0x1 << laneidx);
vmv_sx(v0, kScratchReg);
vmerge_vx(dst.fp().toV(), scratch, dst.fp().toV());
} else {
UNREACHABLE();
}
}
void LiftoffAssembler::StoreLane(Register dst, Register offset,
uintptr_t offset_imm, LiftoffRegister src,
StoreType type, uint8_t lane,
uint32_t* protected_store_pc) {
bailout(kSimd, "StoreLane");
MemOperand dst_op = liftoff::GetMemOp(this, dst, offset, offset_imm);
if (protected_store_pc) *protected_store_pc = pc_offset();
MachineRepresentation rep = type.mem_rep();
if (rep == MachineRepresentation::kWord8) {
VU.set(kScratchReg, E8, m1);
vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
vmv_xs(kScratchReg, kSimd128ScratchReg);
Sb(kScratchReg, dst_op);
} else if (rep == MachineRepresentation::kWord16) {
VU.set(kScratchReg, E16, m1);
vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
vmv_xs(kScratchReg, kSimd128ScratchReg);
Sh(kScratchReg, dst_op);
} else if (rep == MachineRepresentation::kWord32) {
VU.set(kScratchReg, E32, m1);
vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
vmv_xs(kScratchReg, kSimd128ScratchReg);
Sw(kScratchReg, dst_op);
} else {
DCHECK_EQ(MachineRepresentation::kWord64, rep);
VU.set(kScratchReg, E64, m1);
vslidedown_vi(kSimd128ScratchReg, src.fp().toV(), lane);
vmv_xs(kScratchReg, kSimd128ScratchReg);
Sd(kScratchReg, dst_op);
}
}
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
......@@ -1815,31 +1943,97 @@ void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
vmv_vx(dst.fp().toV(), kScratchReg);
}
#define SIMD_BINOP(name1, name2) \
void LiftoffAssembler::emit_##name1##_extmul_low_##name2( \
LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2) { \
bailout(kSimd, "emit_" #name1 "_extmul_low_" #name2); \
} \
void LiftoffAssembler::emit_##name1##_extmul_high_##name2( \
LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2) { \
bailout(kSimd, "emit_" #name1 "_extmul_high_" #name2); \
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E32, m1);
vmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
SIMD_BINOP(i16x8, i8x16_s)
SIMD_BINOP(i16x8, i8x16_u)
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E32, m1);
vmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
SIMD_BINOP(i32x4, i16x8_s)
SIMD_BINOP(i32x4, i16x8_u)
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E32, m1);
vmulh_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
SIMD_BINOP(i64x2, i32x4_s)
SIMD_BINOP(i64x2, i32x4_u)
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E32, m1);
vmulhu_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E16, m1);
vmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E16, m1);
vmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E16, m1);
vmulh_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E16, m1);
vmulhu_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E8, m1);
vmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E8, m1);
vmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E8, m1);
vmulh_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
VU.set(kScratchReg, E8, m1);
vmulhu_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
#undef SIMD_BINOP
void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_q15mulr_sat_s");
VU.set(kScratchReg, E16, m1);
vsmul_vv(dst.fp().toV(), src1.fp().toV(), src2.fp().toV());
}
void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
......@@ -1898,7 +2092,7 @@ void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
WasmRvvGtU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E16, m1);
WasmRvvGtU(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV(), E8, m1);
}
void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1990,7 +2184,7 @@ void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E32, m1);
vmflt_vv(v0, rhs.fp().toV(), lhs.fp().toV());
vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
vmv_vx(dst.fp().toV(), zero_reg);
vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
}
......@@ -1998,7 +2192,7 @@ void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E32, m1);
vmfle_vv(v0, rhs.fp().toV(), lhs.fp().toV());
vmfle_vv(v0, lhs.fp().toV(), rhs.fp().toV());
vmv_vx(dst.fp().toV(), zero_reg);
vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
}
......@@ -2055,7 +2249,7 @@ void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E64, m1);
vmflt_vv(v0, rhs.fp().toV(), lhs.fp().toV());
vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
vmv_vx(dst.fp().toV(), zero_reg);
vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
}
......@@ -2063,7 +2257,7 @@ void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E64, m1);
vmfle_vv(v0, rhs.fp().toV(), lhs.fp().toV());
vmfle_vv(v0, lhs.fp().toV(), rhs.fp().toV());
vmv_vx(dst.fp().toV(), zero_reg);
vmerge_vi(dst.fp().toV(), -1, dst.fp().toV());
}
......@@ -2158,6 +2352,7 @@ void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E8, m1);
andi(rhs.gp(), rhs.gp(), 8 - 1);
vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
......@@ -2165,29 +2360,35 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
DCHECK(is_uint5(rhs));
VU.set(kScratchReg, E8, m1);
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs);
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 8);
}
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i8x16_shr_s");
VU.set(kScratchReg, E8, m1);
andi(rhs.gp(), rhs.gp(), 8 - 1);
vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i8x16_shri_s");
VU.set(kScratchReg, E8, m1);
vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 8);
}
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i8x16_shr_u");
VU.set(kScratchReg, E8, m1);
andi(rhs.gp(), rhs.gp(), 8 - 1);
vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i8x16_shri_u");
VU.set(kScratchReg, E8, m1);
vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 8);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2289,36 +2490,43 @@ void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E16, m1);
andi(rhs.gp(), rhs.gp(), 16 - 1);
vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
DCHECK(is_uint5(rhs));
VU.set(kScratchReg, E16, m1);
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs);
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 16);
}
void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_shr_s");
VU.set(kScratchReg, E16, m1);
andi(rhs.gp(), rhs.gp(), 16 - 1);
vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i16x8_shri_s");
VU.set(kScratchReg, E16, m1);
vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 16);
}
void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_shr_u");
VU.set(kScratchReg, E16, m1);
andi(rhs.gp(), rhs.gp(), 16 - 1);
vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i16x8_shri_u");
DCHECK(is_uint5(rhs));
VU.set(kScratchReg, E16, m1);
vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 16);
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2330,13 +2538,15 @@ void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_add_sat_s");
VU.set(kScratchReg, E16, m1);
vsadd_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_add_sat_u");
VU.set(kScratchReg, E16, m1);
vsaddu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2348,18 +2558,21 @@ void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_sub_sat_s");
VU.set(kScratchReg, E16, m1);
vssub_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_sub_sat_u");
VU.set(kScratchReg, E16, m1);
vssubu_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_mul");
VU.set(kScratchReg, E16, m1);
vmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
......@@ -2420,15 +2633,16 @@ void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E32, m1);
andi(rhs.gp(), rhs.gp(), 32 - 1);
vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
if (is_uint5(rhs)) {
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs);
if (is_uint5(rhs % 32)) {
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 32);
} else {
li(kScratchReg, rhs);
li(kScratchReg, rhs % 32);
vsll_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
}
}
......@@ -2436,23 +2650,39 @@ void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i32x4_shr_s");
VU.set(kScratchReg, E32, m1);
andi(rhs.gp(), rhs.gp(), 32 - 1);
vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i32x4_shri_s");
VU.set(kScratchReg, E32, m1);
if (is_uint5(rhs % 32)) {
vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 32);
} else {
li(kScratchReg, rhs % 32);
vsra_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
}
}
void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i32x4_shr_u");
VU.set(kScratchReg, E32, m1);
andi(rhs.gp(), rhs.gp(), 32 - 1);
vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i32x4_shri_u");
VU.set(kScratchReg, E32, m1);
if (is_uint5(rhs % 32)) {
vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 32);
} else {
li(kScratchReg, rhs % 32);
vsrl_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
}
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2469,7 +2699,8 @@ void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i32x4_mul");
VU.set(kScratchReg, E32, m1);
vmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
......@@ -2528,16 +2759,17 @@ void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E64, m1);
andi(rhs.gp(), rhs.gp(), 64 - 1);
vsll_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
VU.set(kScratchReg, E64, m1);
if (is_uint5(rhs)) {
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs);
if (is_uint5(rhs % 64)) {
vsll_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 64);
} else {
li(kScratchReg, rhs);
li(kScratchReg, rhs % 64);
vsll_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
}
}
......@@ -2545,23 +2777,39 @@ void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i64x2_shr_s");
VU.set(kScratchReg, E64, m1);
andi(rhs.gp(), rhs.gp(), 64 - 1);
vsra_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i64x2_shri_s");
VU.set(kScratchReg, E64, m1);
if (is_uint5(rhs % 64)) {
vsra_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 64);
} else {
li(kScratchReg, rhs % 64);
vsra_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
}
}
void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i64x2_shr_u");
VU.set(kScratchReg, E64, m1);
andi(rhs.gp(), rhs.gp(), 64 - 1);
vsrl_vx(dst.fp().toV(), lhs.fp().toV(), rhs.gp());
}
void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "emit_i64x2_shri_u");
VU.set(kScratchReg, E64, m1);
if (is_uint5(rhs % 64)) {
vsrl_vi(dst.fp().toV(), lhs.fp().toV(), rhs % 64);
} else {
li(kScratchReg, rhs % 64);
vsrl_vx(dst.fp().toV(), lhs.fp().toV(), kScratchReg);
}
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2578,7 +2826,8 @@ void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i64x2_mul");
VU.set(kScratchReg, E64, m1);
vmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
......@@ -2595,7 +2844,8 @@ void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_f32x4_sqrt");
VU.set(kScratchReg, E32, m1);
vfsqrt_v(dst.fp().toV(), src.fp().toV());
}
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
......@@ -2638,13 +2888,13 @@ void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E32, m1);
VU.set(RoundingMode::RTZ);
vfmul_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
vfmul_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E32, m1);
vfdiv_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
vfdiv_vv(dst.fp().toV(), lhs.fp().toV(), rhs.fp().toV());
}
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2675,12 +2925,18 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_f32x4_pmin");
VU.set(kScratchReg, E32, m1);
// b < a ? b : a
vmflt_vv(v0, rhs.fp().toV(), lhs.fp().toV());
vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
}
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_f32x4_pmax");
VU.set(kScratchReg, E32, m1);
// a < b ? b : a
vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
}
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
......@@ -2697,7 +2953,8 @@ void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_f64x2_sqrt");
VU.set(kScratchReg, E64, m1);
vfsqrt_v(dst.fp().toV(), src.fp().toV());
}
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
......@@ -2765,7 +3022,6 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
VU.set(kScratchReg, E64, m1);
const int64_t kNaN = 0x7ff8000000000000L;
VU.set(kScratchReg, E32, m1);
vmfeq_vv(v0, lhs.fp().toV(), lhs.fp().toV());
vmfeq_vv(kSimd128ScratchReg, rhs.fp().toV(), rhs.fp().toV());
vand_vv(v0, v0, kSimd128ScratchReg);
......@@ -2777,12 +3033,18 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_f64x2_pmin");
VU.set(kScratchReg, E64, m1);
// b < a ? b : a
vmflt_vv(v0, rhs.fp().toV(), lhs.fp().toV());
vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
}
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_f64x2_pmax");
VU.set(kScratchReg, E64, m1);
// a < b ? b : a
vmflt_vv(v0, lhs.fp().toV(), rhs.fp().toV());
vmerge_vv(dst.fp().toV(), rhs.fp().toV(), lhs.fp().toV());
}
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
......@@ -2928,19 +3190,35 @@ void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i8x16_rounding_average_u");
VU.set(kScratchReg, E8, m1);
vwaddu_vv(kSimd128ScratchReg, lhs.fp().toV(), rhs.fp().toV());
li(kScratchReg, 1);
vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
li(kScratchReg, 2);
VU.set(kScratchReg2, E16, m2);
vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
VU.set(kScratchReg2, E8, m1);
vnclipu_vi(dst.fp().toV(), kSimd128ScratchReg3, 0);
}
void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_rounding_average_u");
VU.set(kScratchReg2, E16, m1);
vwaddu_vv(kSimd128ScratchReg, lhs.fp().toV(), rhs.fp().toV());
li(kScratchReg, 1);
vwaddu_wx(kSimd128ScratchReg3, kSimd128ScratchReg, kScratchReg);
li(kScratchReg, 2);
VU.set(kScratchReg2, E32, m2);
vdivu_vx(kSimd128ScratchReg3, kSimd128ScratchReg3, kScratchReg);
VU.set(kScratchReg2, E16, m1);
vnclipu_vi(dst.fp().toV(), kSimd128ScratchReg3, 0);
}
void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
LiftoffRegister src) {
VU.set(kScratchReg, E8, m1);
vmv_vx(kSimd128RegZero, zero_reg);
vmv_vv(dst.fp().toV(), src.fp().toV());
vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
}
......@@ -2949,6 +3227,7 @@ void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
LiftoffRegister src) {
VU.set(kScratchReg, E16, m1);
vmv_vx(kSimd128RegZero, zero_reg);
vmv_vv(dst.fp().toV(), src.fp().toV());
vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
}
......@@ -2957,6 +3236,7 @@ void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
VU.set(kScratchReg, E64, m1);
vmv_vx(kSimd128RegZero, zero_reg);
vmv_vv(dst.fp().toV(), src.fp().toV());
vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
}
......@@ -2987,66 +3267,85 @@ void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
vmv_vx(kSimd128RegZero, zero_reg);
vmv_vv(dst.fp().toV(), src.fp().toV());
vmslt_vv(v0, src.fp().toV(), kSimd128RegZero);
vsub_vv(dst.fp().toV(), kSimd128RegZero, src.fp().toV(), Mask);
vneg_vv(dst.fp().toV(), src.fp().toV(), MaskType::Mask);
}
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_i8x16_extract_lane_s");
VU.set(kScratchReg, E8, m1);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vmv_xs(dst.gp(), kSimd128ScratchReg);
}
void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_i8x16_extract_lane_u");
VU.set(kScratchReg, E8, m1);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vmv_xs(dst.gp(), kSimd128ScratchReg);
slli(dst.gp(), dst.gp(), 64 - 8);
srli(dst.gp(), dst.gp(), 64 - 8);
}
void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_i16x8_extract_lane_s");
VU.set(kScratchReg, E16, m1);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vmv_xs(dst.gp(), kSimd128ScratchReg);
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_i16x8_extract_lane_u");
VU.set(kScratchReg, E16, m1);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vmv_xs(dst.gp(), kSimd128ScratchReg);
slli(dst.gp(), dst.gp(), 64 - 16);
srli(dst.gp(), dst.gp(), 64 - 16);
}
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
VU.set(kScratchReg, E32, m1);
vslidedown_vi(v31, lhs.fp().toV(), imm_lane_idx);
vmv_xs(dst.gp(), v31);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vmv_xs(dst.gp(), kSimd128ScratchReg);
}
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_i64x2_extract_lane");
VU.set(kScratchReg, E64, m1);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vmv_xs(dst.gp(), kSimd128ScratchReg);
}
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_f32x4_extract_lane");
VU.set(kScratchReg, E32, m1);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vfmv_fs(dst.fp(), kSimd128ScratchReg);
}
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_f64x2_extract_lane");
VU.set(kScratchReg, E64, m1);
vslidedown_vi(kSimd128ScratchReg, lhs.fp().toV(), imm_lane_idx);
vfmv_fs(dst.fp(), kSimd128ScratchReg);
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
VU.set(kScratchReg, E8, m1);
VU.set(kScratchReg, E64, m1);
li(kScratchReg, 0x1 << imm_lane_idx);
vmv_sx(v0, kScratchReg);
VU.set(kScratchReg, E8, m1);
vmerge_vx(dst.fp().toV(), src2.gp(), src1.fp().toV());
}
......@@ -3084,21 +3383,39 @@ void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_f32x4_replace_lane");
VU.set(kScratchReg, E32, m1);
li(kScratchReg, 0x1 << imm_lane_idx);
vmv_sx(v0, kScratchReg);
fmv_x_w(kScratchReg, src2.fp());
vmerge_vx(dst.fp().toV(), kScratchReg, src1.fp().toV());
}
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "emit_f64x2_replace_lane");
VU.set(kScratchReg, E64, m1);
li(kScratchReg, 0x1 << imm_lane_idx);
vmv_sx(v0, kScratchReg);
fmv_x_d(kScratchReg, src2.fp());
vmerge_vx(dst.fp().toV(), kScratchReg, src1.fp().toV());
}
void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
Register tmp_gp,
LiftoffRegister tmp_s128,
ValueKind lane_kind) {
bailout(kSimd, "emit_s128_set_if_nan");
DoubleRegister tmp_fp = tmp_s128.fp();
vfredmax_vs(kSimd128ScratchReg, src.fp().toV(), src.fp().toV());
vfmv_fs(tmp_fp, kSimd128ScratchReg);
if (lane_kind == kF32) {
feq_s(kScratchReg, tmp_fp, tmp_fp); // scratch <- !IsNan(tmp_fp)
} else {
DCHECK_EQ(lane_kind, kF64);
feq_d(kScratchReg, tmp_fp, tmp_fp); // scratch <- !IsNan(tmp_fp)
}
not_(kScratchReg, kScratchReg);
Sw(kScratchReg, MemOperand(dst));
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment