Commit fd9331eb authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [liftoff]: Implement simd extract lane ops

Change-Id: I7f618657b7cdaeb3870bd1f743c485ac58b17c56
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3782490
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/main@{#81933}
parent eded24d7
......@@ -3544,6 +3544,71 @@ void TurboAssembler::I8x16Splat(Simd128Register dst, Register src) {
vspltb(dst, dst, Operand(7));
}
void TurboAssembler::F64x2ExtractLane(DoubleRegister dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch1,
Register scratch2) {
constexpr int lane_width_in_bytes = 8;
vextractd(scratch1, src, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
mfvsrd(scratch2, scratch1);
MovInt64ToDouble(dst, scratch2);
}
void TurboAssembler::F32x4ExtractLane(DoubleRegister dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch1,
Register scratch2, Register scratch3) {
constexpr int lane_width_in_bytes = 4;
vextractuw(scratch1, src, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
mfvsrd(scratch2, scratch1);
MovIntToFloat(dst, scratch2, scratch3);
}
void TurboAssembler::I64x2ExtractLane(Register dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch) {
constexpr int lane_width_in_bytes = 8;
vextractd(scratch, src, Operand((1 - imm_lane_idx) * lane_width_in_bytes));
mfvsrd(dst, scratch);
}
void TurboAssembler::I32x4ExtractLane(Register dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch) {
constexpr int lane_width_in_bytes = 4;
vextractuw(scratch, src, Operand((3 - imm_lane_idx) * lane_width_in_bytes));
mfvsrd(dst, scratch);
}
void TurboAssembler::I16x8ExtractLaneU(Register dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch) {
constexpr int lane_width_in_bytes = 2;
vextractuh(scratch, src, Operand((7 - imm_lane_idx) * lane_width_in_bytes));
mfvsrd(dst, scratch);
}
void TurboAssembler::I16x8ExtractLaneS(Register dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch) {
I16x8ExtractLaneU(dst, src, imm_lane_idx, scratch);
extsh(dst, dst);
}
void TurboAssembler::I8x16ExtractLaneU(Register dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch) {
vextractub(scratch, src, Operand(15 - imm_lane_idx));
mfvsrd(dst, scratch);
}
void TurboAssembler::I8x16ExtractLaneS(Register dst, Simd128Register src,
uint8_t imm_lane_idx,
Simd128Register scratch) {
I8x16ExtractLaneU(dst, src, imm_lane_idx, scratch);
extsb(dst, dst);
}
Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
Register reg4, Register reg5,
Register reg6) {
......
......@@ -1084,6 +1084,24 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I32x4Splat(Simd128Register dst, Register src);
void I16x8Splat(Simd128Register dst, Register src);
void I8x16Splat(Simd128Register dst, Register src);
void F64x2ExtractLane(DoubleRegister dst, Simd128Register src,
uint8_t imm_lane_idx, Simd128Register scratch1,
Register scratch2);
void F32x4ExtractLane(DoubleRegister dst, Simd128Register src,
uint8_t imm_lane_idx, Simd128Register scratch1,
Register scratch2, Register scratch3);
void I64x2ExtractLane(Register dst, Simd128Register src, uint8_t imm_lane_idx,
Simd128Register scratch);
void I32x4ExtractLane(Register dst, Simd128Register src, uint8_t imm_lane_idx,
Simd128Register scratch);
void I16x8ExtractLaneU(Register dst, Simd128Register src,
uint8_t imm_lane_idx, Simd128Register scratch);
void I16x8ExtractLaneS(Register dst, Simd128Register src,
uint8_t imm_lane_idx, Simd128Register scratch);
void I8x16ExtractLaneU(Register dst, Simd128Register src,
uint8_t imm_lane_idx, Simd128Register scratch);
void I8x16ExtractLaneS(Register dst, Simd128Register src,
uint8_t imm_lane_idx, Simd128Register scratch);
private:
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;
......
......@@ -2222,61 +2222,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kPPC_F64x2ExtractLane: {
constexpr int lane_width_in_bytes = 8;
__ vextractd(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(kScratchReg, kScratchSimd128Reg);
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
__ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg, kScratchReg);
break;
}
case kPPC_F32x4ExtractLane: {
constexpr int lane_width_in_bytes = 4;
__ vextractuw(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(kScratchReg, kScratchSimd128Reg);
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg, ip);
__ F32x4ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg, kScratchReg, ip);
break;
}
case kPPC_I64x2ExtractLane: {
constexpr int lane_width_in_bytes = 8;
__ vextractd(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
__ I64x2ExtractLane(i.OutputRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg);
break;
}
case kPPC_I32x4ExtractLane: {
constexpr int lane_width_in_bytes = 4;
__ vextractuw(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
__ I32x4ExtractLane(i.OutputRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg);
break;
}
case kPPC_I16x8ExtractLaneU: {
constexpr int lane_width_in_bytes = 2;
__ vextractuh(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
__ I16x8ExtractLaneU(i.OutputRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg);
break;
}
case kPPC_I16x8ExtractLaneS: {
constexpr int lane_width_in_bytes = 2;
__ vextractuh(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(kScratchReg, kScratchSimd128Reg);
__ extsh(i.OutputRegister(), kScratchReg);
__ I16x8ExtractLaneS(i.OutputRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg);
break;
}
case kPPC_I8x16ExtractLaneU: {
__ vextractub(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand(15 - i.InputInt8(1)));
__ mfvsrd(i.OutputRegister(), kScratchSimd128Reg);
__ I8x16ExtractLaneU(i.OutputRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg);
break;
}
case kPPC_I8x16ExtractLaneS: {
__ vextractub(kScratchSimd128Reg, i.InputSimd128Register(0),
Operand(15 - i.InputInt8(1)));
__ mfvsrd(kScratchReg, kScratchSimd128Reg);
__ extsb(i.OutputRegister(), kScratchReg);
__ I8x16ExtractLaneS(i.OutputRegister(), i.InputSimd128Register(0),
i.InputInt8(1), kScratchSimd128Reg);
break;
}
case kPPC_F64x2ReplaceLane: {
......
......@@ -1770,6 +1770,62 @@ void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
I8x16Splat(dst.fp().toSimd(), src.gp());
}
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
F64x2ExtractLane(dst.fp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg, r0);
}
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
F32x4ExtractLane(dst.fp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg, r0, ip);
}
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
I64x2ExtractLane(dst.gp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
I32x4ExtractLane(dst.gp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
I16x8ExtractLaneU(dst.gp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
I16x8ExtractLaneS(dst.gp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
I8x16ExtractLaneU(dst.gp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg);
}
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
I8x16ExtractLaneS(dst.gp(), lhs.fp().toSimd(), imm_lane_idx,
kScratchSimd128Reg);
}
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uintptr_t offset_imm,
LoadType type,
......@@ -1839,12 +1895,6 @@ void LiftoffAssembler::emit_s128_relaxed_laneselect(LiftoffRegister dst,
bailout(kRelaxedSimd, "emit_s128_relaxed_laneselect");
}
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_f64x2extractlane");
}
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
......@@ -1958,12 +2008,6 @@ void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
bailout(kSimd, "f64x2.promote_low_f32x4");
}
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_f32x4extractlane");
}
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
......@@ -2062,12 +2106,6 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
bailout(kSimd, "pmax unimplemented");
}
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i64x2extractlane");
}
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
......@@ -2181,12 +2219,6 @@ void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i32x4extractlane");
}
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
......@@ -2430,12 +2462,6 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
bailout(kUnsupportedArchitecture, "emit_i16x8_max_u");
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i16x8extractlane_u");
}
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
......@@ -2453,12 +2479,6 @@ void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
bailout(kSimd, "i16x8.extadd_pairwise_i8x16_u");
}
void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i16x8extractlane_s");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
......@@ -2521,12 +2541,6 @@ void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
bailout(kSimd, "i8x16.popcnt");
}
void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i8x16extractlane_u");
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
......@@ -2586,12 +2600,6 @@ void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
bailout(kSimd, "i8x16_shri_u");
}
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
bailout(kUnsupportedArchitecture, "emit_i8x16extractlane_s");
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16add");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment