Commit 0c793e70 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [liftoff]: implement simd min/max ops

Change-Id: I064347b21de1eb8013754e715d99f13c6e59c192
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3876443
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/main@{#83085}
parent 50802793
......@@ -3688,15 +3688,29 @@ void TurboAssembler::StoreF32LE(DoubleRegister dst, const MemOperand& mem,
V(F32x4Sub, vsubfp) \
V(F32x4Mul, xvmulsp) \
V(F32x4Div, xvdivsp) \
V(F32x4Min, vminfp) \
V(F32x4Max, vmaxfp) \
V(I64x2Add, vaddudm) \
V(I64x2Sub, vsubudm) \
V(I32x4Add, vadduwm) \
V(I32x4Sub, vsubuwm) \
V(I32x4Mul, vmuluwm) \
V(I32x4MinS, vminsw) \
V(I32x4MinU, vminuw) \
V(I32x4MaxS, vmaxsw) \
V(I32x4MaxU, vmaxuw) \
V(I16x8Add, vadduhm) \
V(I16x8Sub, vsubuhm) \
V(I16x8MinS, vminsh) \
V(I16x8MinU, vminuh) \
V(I16x8MaxS, vmaxsh) \
V(I16x8MaxU, vmaxuh) \
V(I8x16Add, vaddubm) \
V(I8x16Sub, vsububm)
V(I8x16Sub, vsububm) \
V(I8x16MinS, vminsb) \
V(I8x16MinU, vminub) \
V(I8x16MaxS, vmaxsb) \
V(I8x16MaxU, vmaxub)
#define EMIT_SIMD_BINOP(name, op) \
void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
......@@ -3707,39 +3721,6 @@ SIMD_BINOP_LIST(EMIT_SIMD_BINOP)
#undef EMIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
void TurboAssembler::I64x2Mul(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Register scratch1,
Register scratch2, Register scratch3,
Simd128Register scratch4) {
constexpr int lane_width_in_bytes = 8;
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
vmulld(dst, src1, src2);
} else {
Register scratch_1 = scratch1;
Register scratch_2 = scratch2;
for (int i = 0; i < 2; i++) {
if (i > 0) {
vextractd(scratch4, src1, Operand(1 * lane_width_in_bytes));
vextractd(dst, src2, Operand(1 * lane_width_in_bytes));
src1 = scratch4;
src2 = dst;
}
mfvsrd(scratch_1, src1);
mfvsrd(scratch_2, src2);
mulld(scratch_1, scratch_1, scratch_2);
scratch_1 = scratch2;
scratch_2 = scratch3;
}
mtvsrdd(dst, scratch1, scratch2);
}
}
void TurboAssembler::I16x8Mul(Simd128Register dst, Simd128Register src1,
Simd128Register src2) {
vxor(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
vmladduhm(dst, src1, src2, kSimd128RegZero);
}
void TurboAssembler::LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register scratch) {
GenerateMemoryOperationRR(dst, mem, lxvx);
......@@ -3958,6 +3939,63 @@ void TurboAssembler::I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
vinsertb(dst, scratch, Operand(15 - imm_lane_idx));
}
void TurboAssembler::I64x2Mul(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Register scratch1,
Register scratch2, Register scratch3,
Simd128Register scratch4) {
constexpr int lane_width_in_bytes = 8;
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
vmulld(dst, src1, src2);
} else {
Register scratch_1 = scratch1;
Register scratch_2 = scratch2;
for (int i = 0; i < 2; i++) {
if (i > 0) {
vextractd(scratch4, src1, Operand(1 * lane_width_in_bytes));
vextractd(dst, src2, Operand(1 * lane_width_in_bytes));
src1 = scratch4;
src2 = dst;
}
mfvsrd(scratch_1, src1);
mfvsrd(scratch_2, src2);
mulld(scratch_1, scratch_1, scratch_2);
scratch_1 = scratch2;
scratch_2 = scratch3;
}
mtvsrdd(dst, scratch1, scratch2);
}
}
void TurboAssembler::I16x8Mul(Simd128Register dst, Simd128Register src1,
Simd128Register src2) {
vxor(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
vmladduhm(dst, src1, src2, kSimd128RegZero);
}
#define F64X2_MIN_MAX_NAN(result) \
xvcmpeqdp(scratch2, src1, src1); \
vsel(result, src1, result, scratch2); \
xvcmpeqdp(scratch2, src2, src2); \
vsel(dst, src2, result, scratch2); \
/* Use xvmindp to turn any selected SNANs to QNANs. */ \
xvmindp(dst, dst, dst);
void TurboAssembler::F64x2Min(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register scratch1,
Simd128Register scratch2) {
xvmindp(scratch1, src1, src2);
// We need to check if an input is NAN and preserve it.
F64X2_MIN_MAX_NAN(scratch1)
}
void TurboAssembler::F64x2Max(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register scratch1,
Simd128Register scratch2) {
xvmaxdp(scratch1, src1, src2);
// We need to check if an input is NAN and preserve it.
F64X2_MIN_MAX_NAN(scratch1)
}
#undef F64X2_MIN_MAX_NAN
Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
Register reg4, Register reg5,
Register reg6) {
......
......@@ -1089,16 +1089,30 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max) \
V(I64x2Add) \
V(I64x2Sub) \
V(I32x4MinS) \
V(I32x4MinU) \
V(I32x4MaxS) \
V(I32x4MaxU) \
V(I32x4Add) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I16x8Add) \
V(I16x8Sub) \
V(I16x8Mul) \
V(I16x8MinS) \
V(I16x8MinU) \
V(I16x8MaxS) \
V(I16x8MaxU) \
V(I8x16Add) \
V(I8x16Sub)
V(I8x16Sub) \
V(I8x16MinS) \
V(I8x16MinU) \
V(I8x16MaxS) \
V(I8x16MaxU)
#define PROTOTYPE_SIMD_BINOP(name) \
void name(Simd128Register dst, Simd128Register src1, Simd128Register src2);
......@@ -1161,6 +1175,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I64x2Mul(Simd128Register dst, Simd128Register src1, Simd128Register src2,
Register scratch1, Register scrahc2, Register scratch3,
Simd128Register scratch4);
void F64x2Min(Simd128Register dst, Simd128Register src1, Simd128Register src2,
Simd128Register scratch1, Simd128Register scratch2);
void F64x2Max(Simd128Register dst, Simd128Register src1, Simd128Register src2,
Simd128Register scratch1, Simd128Register scratch2);
private:
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;
......
......@@ -2203,16 +2203,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max) \
V(I64x2Add) \
V(I64x2Sub) \
V(I32x4Add) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
V(I32x4MinU) \
V(I32x4MaxS) \
V(I32x4MaxU) \
V(I16x8Add) \
V(I16x8Sub) \
V(I16x8Mul) \
V(I16x8MinS) \
V(I16x8MinU) \
V(I16x8MaxS) \
V(I16x8MaxU) \
V(I8x16Add) \
V(I8x16Sub)
V(I8x16Sub) \
V(I8x16MinS) \
V(I8x16MinU) \
V(I8x16MaxS) \
V(I8x16MaxU)
#define EMIT_SIMD_BINOP(name) \
case kPPC_##name: { \
......@@ -2338,64 +2352,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.ToRegister(instr->TempAt(0)), kScratchSimd128Reg);
break;
}
case kPPC_I32x4MinS: {
__ vminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4MinU: {
__ vminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8MinS: {
__ vminsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8MinU: {
__ vminuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I8x16MinS: {
__ vminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I8x16MinU: {
__ vminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4MaxS: {
__ vmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4MaxU: {
__ vmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8MaxS: {
__ vmaxsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8MaxU: {
__ vmaxuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I8x16MaxS: {
__ vmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
case kPPC_F64x2Min: {
__ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchSimd128Reg,
kScratchSimd128Reg2);
break;
}
case kPPC_I8x16MaxU: {
__ vmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
case kPPC_F64x2Max: {
__ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchSimd128Reg,
kScratchSimd128Reg2);
break;
}
case kPPC_F64x2Eq: {
......@@ -3070,42 +3036,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vandc(dst, src, i.InputSimd128Register(1));
break;
}
#define F64X2_MIN_MAX_NAN(result) \
__ xvcmpeqdp(kScratchSimd128Reg2, i.InputSimd128Register(0), \
i.InputSimd128Register(0)); \
__ vsel(result, i.InputSimd128Register(0), result, kScratchSimd128Reg2); \
__ xvcmpeqdp(kScratchSimd128Reg2, i.InputSimd128Register(1), \
i.InputSimd128Register(1)); \
__ vsel(i.OutputSimd128Register(), i.InputSimd128Register(1), result, \
kScratchSimd128Reg2); \
/* Use xvmindp to turn any selected SNANs to QNANs. */ \
__ xvmindp(i.OutputSimd128Register(), i.OutputSimd128Register(), \
i.OutputSimd128Register());
case kPPC_F64x2Min: {
__ xvmindp(kScratchSimd128Reg, i.InputSimd128Register(0),
i.InputSimd128Register(1));
// We need to check if an input is NAN and preserve it.
F64X2_MIN_MAX_NAN(kScratchSimd128Reg)
break;
}
case kPPC_F64x2Max: {
__ xvmaxdp(kScratchSimd128Reg, i.InputSimd128Register(0),
i.InputSimd128Register(1));
// We need to check if an input is NAN and preserve it.
F64X2_MIN_MAX_NAN(kScratchSimd128Reg)
break;
}
#undef F64X2_MIN_MAX_NAN
case kPPC_F32x4Min: {
__ vminfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Max: {
__ vmaxfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F64x2Ceil: {
__ xvrdpip(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
......
......@@ -1763,25 +1763,39 @@ bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
return false;
}
#define SIMD_BINOP_LIST(V) \
V(f64x2_add, F64x2Add) \
V(f64x2_sub, F64x2Sub) \
V(f64x2_mul, F64x2Mul) \
V(f64x2_div, F64x2Div) \
V(f32x4_add, F32x4Add) \
V(f32x4_sub, F32x4Sub) \
V(f32x4_mul, F32x4Mul) \
V(f32x4_div, F32x4Div) \
V(i64x2_add, I64x2Add) \
V(i64x2_sub, I64x2Sub) \
V(i32x4_add, I32x4Add) \
V(i32x4_sub, I32x4Sub) \
V(i32x4_mul, I32x4Mul) \
V(i16x8_add, I16x8Add) \
V(i16x8_sub, I16x8Sub) \
V(i16x8_mul, I16x8Mul) \
V(i8x16_add, I8x16Add) \
V(i8x16_sub, I8x16Sub)
#define SIMD_BINOP_LIST(V) \
V(f64x2_add, F64x2Add) \
V(f64x2_sub, F64x2Sub) \
V(f64x2_mul, F64x2Mul) \
V(f64x2_div, F64x2Div) \
V(f32x4_add, F32x4Add) \
V(f32x4_sub, F32x4Sub) \
V(f32x4_mul, F32x4Mul) \
V(f32x4_div, F32x4Div) \
V(f32x4_min, F32x4Min) \
V(f32x4_max, F32x4Max) \
V(i64x2_add, I64x2Add) \
V(i64x2_sub, I64x2Sub) \
V(i32x4_add, I32x4Add) \
V(i32x4_sub, I32x4Sub) \
V(i32x4_mul, I32x4Mul) \
V(i32x4_min_s, I32x4MinS) \
V(i32x4_min_u, I32x4MinU) \
V(i32x4_max_s, I32x4MaxS) \
V(i32x4_max_u, I32x4MaxU) \
V(i16x8_add, I16x8Add) \
V(i16x8_sub, I16x8Sub) \
V(i16x8_mul, I16x8Mul) \
V(i16x8_min_s, I16x8MinS) \
V(i16x8_min_u, I16x8MinU) \
V(i16x8_max_s, I16x8MaxS) \
V(i16x8_max_u, I16x8MaxU) \
V(i8x16_add, I8x16Add) \
V(i8x16_sub, I8x16Sub) \
V(i8x16_min_s, I8x16MinS) \
V(i8x16_min_u, I8x16MinU) \
V(i8x16_max_s, I8x16MaxS) \
V(i8x16_max_u, I8x16MaxU)
#define EMIT_SIMD_BINOP(name, op) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister lhs, \
......@@ -1936,6 +1950,18 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
pop(scratch);
}
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
F64x2Min(dst.fp().toSimd(), lhs.fp().toSimd(), rhs.fp().toSimd(),
kScratchSimd128Reg, kScratchSimd128Reg2);
}
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
F64x2Max(dst.fp().toSimd(), lhs.fp().toSimd(), rhs.fp().toSimd(),
kScratchSimd128Reg, kScratchSimd128Reg2);
}
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uintptr_t offset_imm,
LoadType type,
......@@ -2044,16 +2070,6 @@ bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f64x2min");
}
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f64x2max");
}
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "pmin unimplemented");
......@@ -2130,16 +2146,6 @@ bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f32x4min");
}
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f32x4max");
}
void LiftoffAssembler::emit_f32x4_relaxed_min(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -2300,30 +2306,6 @@ void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
bailout(kSimd, "i32x4_shri_u");
}
void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i32x4_min_s");
}
void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i32x4_min_u");
}
void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i32x4_max_s");
}
void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i32x4_max_u");
}
void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -2435,30 +2417,6 @@ void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
bailout(kUnsupportedArchitecture, "emit_i16x8addsaturate_u");
}
void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8_min_s");
}
void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8_min_u");
}
void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8_max_s");
}
void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8_max_u");
}
void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8.extadd_pairwise_i8x16_s");
......@@ -2589,30 +2547,6 @@ void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
bailout(kUnsupportedArchitecture, "emit_i8x16addsaturate_s");
}
void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16_min_s");
}
void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16_min_u");
}
void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16_max_s");
}
void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16_max_u");
}
void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16_eq");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment