Commit 8d7186c6 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [liftoff]: Initiate simd binary ops

Change-Id: I61227d13bd6d9efb037ea62179c02e0571d61fa8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3870654Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#82997}
parent 38cf5793
......@@ -3678,6 +3678,67 @@ void TurboAssembler::StoreF32LE(DoubleRegister dst, const MemOperand& mem,
}
// Simd Support.
#define SIMD_BINOP_LIST(V) \
V(F64x2Add, xvadddp) \
V(F64x2Sub, xvsubdp) \
V(F64x2Mul, xvmuldp) \
V(F64x2Div, xvdivdp) \
V(F32x4Add, vaddfp) \
V(F32x4Sub, vsubfp) \
V(F32x4Mul, xvmulsp) \
V(F32x4Div, xvdivsp) \
V(I64x2Add, vaddudm) \
V(I64x2Sub, vsubudm) \
V(I32x4Add, vadduwm) \
V(I32x4Sub, vsubuwm) \
V(I32x4Mul, vmuluwm) \
V(I16x8Add, vadduhm) \
V(I16x8Sub, vsubuhm) \
V(I8x16Add, vaddubm) \
V(I8x16Sub, vsububm)
#define EMIT_SIMD_BINOP(name, op) \
void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
Simd128Register src2) { \
op(dst, src1, src2); \
}
SIMD_BINOP_LIST(EMIT_SIMD_BINOP)
#undef EMIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
void TurboAssembler::I64x2Mul(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Register scratch1,
Register scratch2, Register scratch3,
Simd128Register scratch4) {
constexpr int lane_width_in_bytes = 8;
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
vmulld(dst, src1, src2);
} else {
Register scratch_1 = scratch1;
Register scratch_2 = scratch2;
for (int i = 0; i < 2; i++) {
if (i > 0) {
vextractd(scratch4, src1, Operand(1 * lane_width_in_bytes));
vextractd(dst, src2, Operand(1 * lane_width_in_bytes));
src1 = scratch4;
src2 = dst;
}
mfvsrd(scratch_1, src1);
mfvsrd(scratch_2, src2);
mulld(scratch_1, scratch_1, scratch_2);
scratch_1 = scratch2;
scratch_2 = scratch3;
}
mtvsrdd(dst, scratch1, scratch2);
}
}
void TurboAssembler::I16x8Mul(Simd128Register dst, Simd128Register src1,
Simd128Register src2) {
vxor(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
vmladduhm(dst, src1, src2, kSimd128RegZero);
}
void TurboAssembler::LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register scratch) {
GenerateMemoryOperationRR(dst, mem, lxvx);
......
......@@ -1080,6 +1080,32 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register scratch2);
// Simd Support.
#define SIMD_BINOP_LIST(V) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Div) \
V(F32x4Add) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(I64x2Add) \
V(I64x2Sub) \
V(I32x4Add) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I16x8Add) \
V(I16x8Sub) \
V(I16x8Mul) \
V(I8x16Add) \
V(I8x16Sub)
#define PROTOTYPE_SIMD_BINOP(name) \
void name(Simd128Register dst, Simd128Register src1, Simd128Register src2);
SIMD_BINOP_LIST(PROTOTYPE_SIMD_BINOP)
#undef PROTOTYPE_SIMD_BINOP
#undef SIMD_BINOP_LIST
void LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register scratch);
void StoreSimd128(Simd128Register src, const MemOperand& mem,
......@@ -1132,6 +1158,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx,
Simd128Register scratch);
void I64x2Mul(Simd128Register dst, Simd128Register src1, Simd128Register src2,
Register scratch1, Register scrahc2, Register scratch3,
Simd128Register scratch4);
private:
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;
......
......@@ -2193,6 +2193,37 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_STORE_INTEGER_RR(stdbrx);
break;
}
// Simd Support.
#define SIMD_BINOP_LIST(V) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Div) \
V(F32x4Add) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(I64x2Add) \
V(I64x2Sub) \
V(I32x4Add) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I16x8Add) \
V(I16x8Sub) \
V(I16x8Mul) \
V(I8x16Add) \
V(I8x16Sub)
#define EMIT_SIMD_BINOP(name) \
case kPPC_##name: { \
__ name(i.OutputSimd128Register(), i.InputSimd128Register(0), \
i.InputSimd128Register(1)); \
break; \
}
SIMD_BINOP_LIST(EMIT_SIMD_BINOP)
#undef EMIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
case kPPC_F64x2Splat: {
__ F64x2Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0),
kScratchReg);
......@@ -2301,117 +2332,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kScratchSimd128Reg);
break;
}
case kPPC_F64x2Add: {
__ xvadddp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F64x2Sub: {
__ xvsubdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F64x2Mul: {
__ xvmuldp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Add: {
__ vaddfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Sub: {
__ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Mul: {
__ xvmulsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I64x2Add: {
__ vaddudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I64x2Sub: {
__ vsubudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I64x2Mul: {
constexpr int lane_width_in_bytes = 8;
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Register tempReg1 = i.ToRegister(instr->TempAt(0));
Register scratch_0 = ip;
Register scratch_1 = r0;
Simd128Register dst = i.OutputSimd128Register();
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ vmulld(dst, src0, src1);
} else {
for (int i = 0; i < 2; i++) {
if (i > 0) {
__ vextractd(kScratchSimd128Reg, src0,
Operand(1 * lane_width_in_bytes));
__ vextractd(kScratchSimd128Reg2, src1,
Operand(1 * lane_width_in_bytes));
src0 = kScratchSimd128Reg;
src1 = kScratchSimd128Reg2;
}
__ mfvsrd(scratch_0, src0);
__ mfvsrd(scratch_1, src1);
__ mulld(scratch_0, scratch_0, scratch_1);
scratch_0 = r0;
scratch_1 = tempReg1;
}
__ mtvsrdd(dst, ip, r0);
}
break;
}
case kPPC_I32x4Add: {
__ vadduwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4Sub: {
__ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I32x4Mul: {
__ vmuluwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8Add: {
__ vadduhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8Sub: {
__ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I16x8Mul: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register dst = i.OutputSimd128Register();
__ vxor(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
__ vmladduhm(dst, src0, src1, kSimd128RegZero);
break;
}
case kPPC_I8x16Add: {
__ vaddubm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_I8x16Sub: {
__ vsububm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), ip, r0,
i.ToRegister(instr->TempAt(0)), kScratchSimd128Reg);
break;
}
case kPPC_I32x4MinS: {
......@@ -3146,11 +3070,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vandc(dst, src, i.InputSimd128Register(1));
break;
}
case kPPC_F64x2Div: {
__ xvdivdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
#define F64X2_MIN_MAX_NAN(result) \
__ xvcmpeqdp(kScratchSimd128Reg2, i.InputSimd128Register(0), \
i.InputSimd128Register(0)); \
......@@ -3177,11 +3096,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef F64X2_MIN_MAX_NAN
case kPPC_F32x4Div: {
__ xvdivsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kPPC_F32x4Min: {
__ vminfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......
......@@ -1764,6 +1764,35 @@ bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
return false;
}
#define SIMD_BINOP_LIST(V) \
V(f64x2_add, F64x2Add) \
V(f64x2_sub, F64x2Sub) \
V(f64x2_mul, F64x2Mul) \
V(f64x2_div, F64x2Div) \
V(f32x4_add, F32x4Add) \
V(f32x4_sub, F32x4Sub) \
V(f32x4_mul, F32x4Mul) \
V(f32x4_div, F32x4Div) \
V(i64x2_add, I64x2Add) \
V(i64x2_sub, I64x2Sub) \
V(i32x4_add, I32x4Add) \
V(i32x4_sub, I32x4Sub) \
V(i32x4_mul, I32x4Mul) \
V(i16x8_add, I16x8Add) \
V(i16x8_sub, I16x8Sub) \
V(i16x8_mul, I16x8Mul) \
V(i8x16_add, I8x16Add) \
V(i8x16_sub, I8x16Sub)
#define EMIT_SIMD_BINOP(name, op) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister lhs, \
LiftoffRegister rhs) { \
op(dst.fp().toSimd(), lhs.fp().toSimd(), rhs.fp().toSimd()); \
}
SIMD_BINOP_LIST(EMIT_SIMD_BINOP)
#undef EMIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
F64x2Splat(dst.fp().toSimd(), src.fp(), r0);
......@@ -1898,6 +1927,16 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
imm_lane_idx, kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// TODO(miladfarca): Make use of UseScratchRegisterScope.
Register scratch = GetRegisterThatIsNotOneOf(ip, r0);
push(scratch);
I64x2Mul(dst.fp().toSimd(), lhs.fp().toSimd(), rhs.fp().toSimd(), ip, r0,
scratch, kScratchSimd128Reg);
pop(scratch);
}
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uintptr_t offset_imm,
LoadType type,
......@@ -2006,26 +2045,6 @@ bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f64x2add");
}
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f64x2sub");
}
void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f64x2mul");
}
void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f64x2div");
}
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f64x2min");
......@@ -2112,26 +2131,6 @@ bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f32x4add");
}
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f32x4sub");
}
void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f32x4mul");
}
void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f32x4div");
}
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_f32x4min");
......@@ -2206,21 +2205,6 @@ void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
bailout(kSimd, "i64x2_shri_u");
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i64x2add");
}
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i64x2sub");
}
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i64x2mul");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
......@@ -2317,21 +2301,6 @@ void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
bailout(kSimd, "i32x4_shri_u");
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i32x4add");
}
void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i32x4sub");
}
void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i32x4mul");
}
void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -2443,22 +2412,12 @@ void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
bailout(kSimd, "i16x8_shri_u");
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8add");
}
void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8addsaturate_s");
}
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8sub");
}
void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -2471,11 +2430,6 @@ void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
bailout(kUnsupportedArchitecture, "emit_i16x8subsaturate_u");
}
void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i16x8mul");
}
void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -2630,11 +2584,6 @@ void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
bailout(kSimd, "i8x16_shri_u");
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16add");
}
void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......@@ -2983,11 +2932,6 @@ void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
bailout(kSimd, "i64x2.abs");
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16sub");
}
void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment