Commit b5aa78d0 authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

PPC: [wasm-simd] Implement simd ReplaceLane

Also modified simd ExtractLane to use the input lane.

Change-Id: Icc40226c1f3e001eb588e8c44570399c19582404
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2199643Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#67826}
parent 78044358
......@@ -1778,11 +1778,32 @@ void Assembler::vor(const DoubleRegister rt, const DoubleRegister ra,
emit(VOR | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vxor(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb) {
emit(VXOR | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vnor(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb) {
emit(VNOR | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vsro(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb) {
emit(VSRO | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vslo(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb) {
emit(VSLO | rt.code() * B21 | ra.code() * B16 | rb.code() * B11);
}
void Assembler::vperm(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb, const DoubleRegister rc) {
emit(VPERM | rt.code() * B21 | ra.code() * B16 | rb.code() * B11 |
rc.code() * B6);
}
// Pseudo instructions.
void Assembler::nop(int type) {
Register reg = r0;
......
......@@ -950,10 +950,18 @@ class Assembler : public AssemblerBase {
void mfvsrd(const Register ra, const DoubleRegister r);
void mfvsrwz(const Register ra, const DoubleRegister r);
void mtvsrd(const DoubleRegister rt, const Register ra);
void vxor(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb);
void vnor(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb);
void vor(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb);
void vsro(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb);
void vslo(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb);
void vperm(const DoubleRegister rt, const DoubleRegister ra,
const DoubleRegister rb, const DoubleRegister rc);
// Pseudo instructions
......
......@@ -2208,46 +2208,178 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(dst, dst, Operand(7));
break;
}
#define SHIFT_TO_CORRECT_LANE(starting_lane_nummber, lane_input, \
lane_width_in_bytes, input_register) \
int shift_bits = abs(lane_input - starting_lane_nummber) * \
lane_width_in_bytes * kBitsPerByte; \
if (shift_bits > 0) { \
__ li(ip, Operand(shift_bits)); \
__ mtvsrd(kScratchDoubleReg, ip); \
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
if (lane_input < starting_lane_nummber) { \
__ vsro(input_register, input_register, kScratchDoubleReg); \
} else if (lane_input > starting_lane_nummber) { \
__ vslo(input_register, input_register, kScratchDoubleReg); \
} \
}
case kPPC_F64x2ExtractLane: {
int32_t lane = 1 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(0, lane, 8, i.InputSimd128Register(0))
__ mfvsrd(kScratchReg, i.InputSimd128Register(0));
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
break;
}
case kPPC_F32x4ExtractLane: {
int32_t lane = 3 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(1, lane, 4, i.InputSimd128Register(0))
__ mfvsrwz(kScratchReg, i.InputSimd128Register(0));
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
break;
}
case kPPC_I64x2ExtractLane: {
int32_t lane = 1 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(0, lane, 8, i.InputSimd128Register(0))
__ mfvsrd(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
case kPPC_I32x4ExtractLane: {
int32_t lane = 3 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(1, lane, 4, i.InputSimd128Register(0))
__ mfvsrwz(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
case kPPC_I16x8ExtractLaneU: {
int32_t lane = 7 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(2, lane, 2, i.InputSimd128Register(0))
__ mfvsrwz(r0, i.InputSimd128Register(0));
__ li(ip, Operand(16));
__ srd(i.OutputRegister(), r0, ip);
break;
}
case kPPC_I16x8ExtractLaneS: {
int32_t lane = 7 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(2, lane, 2, i.InputSimd128Register(0))
__ mfvsrwz(kScratchReg, i.InputSimd128Register(0));
__ sradi(i.OutputRegister(), kScratchReg, 16);
break;
}
case kPPC_I8x16ExtractLaneU: {
int32_t lane = 15 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(4, lane, 1, i.InputSimd128Register(0))
__ mfvsrwz(r0, i.InputSimd128Register(0));
__ li(ip, Operand(24));
__ srd(i.OutputRegister(), r0, ip);
break;
}
case kPPC_I8x16ExtractLaneS: {
int32_t lane = 15 - i.InputInt8(1);
SHIFT_TO_CORRECT_LANE(4, lane, 1, i.InputSimd128Register(0))
__ mfvsrwz(kScratchReg, i.InputSimd128Register(0));
__ sradi(i.OutputRegister(), kScratchReg, 24);
break;
}
#undef SHIFT_TO_CORRECT_LANE
#define GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, \
lane_width_in_bytes) \
uint64_t mask = 0; \
for (int i = 0, j = 0; i <= kSimd128Size - 1; i++) { \
mask <<= kBitsPerByte; \
if (i >= lane * lane_width_in_bytes && \
i < lane * lane_width_in_bytes + lane_width_in_bytes) { \
mask |= replacement_value_byte_lane + j; \
j++; \
} else { \
mask |= i; \
} \
if (i == (kSimd128Size / 2) - 1) { \
__ mov(r0, Operand(mask)); \
mask = 0; \
} else if (i >= kSimd128Size - 1) { \
__ mov(ip, Operand(mask)); \
} \
} \
/* Need to maintain 16 byte alignment for lvx */ \
__ addi(sp, sp, Operand(-24)); \
__ StoreP(ip, MemOperand(sp, 0)); \
__ StoreP(r0, MemOperand(sp, 8)); \
__ li(r0, Operand(0)); \
__ lvx(kScratchDoubleReg, MemOperand(sp, r0)); \
__ addi(sp, sp, Operand(24));
case kPPC_F64x2ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
int32_t lane = 1 - i.InputInt8(1);
constexpr int replacement_value_byte_lane = 16;
constexpr int lane_width_in_bytes = 8;
GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
lane_width_in_bytes)
__ MovDoubleToInt64(r0, i.InputDoubleRegister(2));
__ mtvsrd(dst, r0);
__ vperm(dst, src, dst, kScratchDoubleReg);
break;
}
case kPPC_F32x4ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
int32_t lane = 3 - i.InputInt8(1);
constexpr int replacement_value_byte_lane = 20;
constexpr int lane_width_in_bytes = 4;
GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
lane_width_in_bytes)
__ MovFloatToInt(kScratchReg, i.InputDoubleRegister(2));
__ mtvsrd(dst, kScratchReg);
__ vperm(dst, src, dst, kScratchDoubleReg);
break;
}
case kPPC_I64x2ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
int32_t lane = 1 - i.InputInt8(1);
constexpr int replacement_value_byte_lane = 16;
constexpr int lane_width_in_bytes = 8;
GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
lane_width_in_bytes)
__ mtvsrd(dst, i.InputRegister(2));
__ vperm(dst, src, dst, kScratchDoubleReg);
break;
}
case kPPC_I32x4ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
int32_t lane = 3 - i.InputInt8(1);
constexpr int replacement_value_byte_lane = 20;
constexpr int lane_width_in_bytes = 4;
GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
lane_width_in_bytes)
__ mtvsrd(dst, i.InputRegister(2));
__ vperm(dst, src, dst, kScratchDoubleReg);
break;
}
case kPPC_I16x8ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
int32_t lane = 7 - i.InputInt8(1);
constexpr int replacement_value_byte_lane = 22;
constexpr int lane_width_in_bytes = 2;
GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
lane_width_in_bytes)
__ mtvsrd(dst, i.InputRegister(2));
__ vperm(dst, src, dst, kScratchDoubleReg);
break;
}
case kPPC_I8x16ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
int32_t lane = 15 - i.InputInt8(1);
constexpr int replacement_value_byte_lane = 23;
constexpr int lane_width_in_bytes = 1;
GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
lane_width_in_bytes)
__ mtvsrd(dst, i.InputRegister(2));
__ vperm(dst, src, dst, kScratchDoubleReg);
break;
}
#undef GENERATE_REPLACE_LANE_MASK
case kPPC_StoreCompressTagged: {
ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
break;
......
......@@ -192,18 +192,24 @@ namespace compiler {
V(PPC_AtomicXorInt64) \
V(PPC_F64x2Splat) \
V(PPC_F64x2ExtractLane) \
V(PPC_F64x2ReplaceLane) \
V(PPC_F32x4Splat) \
V(PPC_F32x4ExtractLane) \
V(PPC_F32x4ReplaceLane) \
V(PPC_I64x2Splat) \
V(PPC_I64x2ExtractLane) \
V(PPC_I64x2ReplaceLane) \
V(PPC_I32x4Splat) \
V(PPC_I32x4ExtractLane) \
V(PPC_I32x4ReplaceLane) \
V(PPC_I16x8Splat) \
V(PPC_I16x8ExtractLaneU) \
V(PPC_I16x8ExtractLaneS) \
V(PPC_I16x8ReplaceLane) \
V(PPC_I8x16Splat) \
V(PPC_I8x16ExtractLaneU) \
V(PPC_I8x16ExtractLaneS) \
V(PPC_I8x16ReplaceLane) \
V(PPC_StoreCompressTagged) \
V(PPC_LoadDecompressTaggedSigned) \
V(PPC_LoadDecompressTaggedPointer) \
......
......@@ -115,18 +115,24 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_CompressAny:
case kPPC_F64x2Splat:
case kPPC_F64x2ExtractLane:
case kPPC_F64x2ReplaceLane:
case kPPC_F32x4Splat:
case kPPC_F32x4ExtractLane:
case kPPC_F32x4ReplaceLane:
case kPPC_I64x2Splat:
case kPPC_I64x2ExtractLane:
case kPPC_I64x2ReplaceLane:
case kPPC_I32x4Splat:
case kPPC_I32x4ExtractLane:
case kPPC_I32x4ReplaceLane:
case kPPC_I16x8Splat:
case kPPC_I16x8ExtractLaneU:
case kPPC_I16x8ExtractLaneS:
case kPPC_I16x8ReplaceLane:
case kPPC_I8x16Splat:
case kPPC_I8x16ExtractLaneU:
case kPPC_I8x16ExtractLaneS:
case kPPC_I8x16ReplaceLane:
return kNoOpcodeFlags;
case kPPC_LoadWordS8:
......
......@@ -2135,7 +2135,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
}
SIMD_TYPES(SIMD_VISIT_SPLAT)
#undef SIMD_VISIT_SPLAT
#undef SIMD_TYPES
#define SIMD_VISIT_EXTRACT_LANE(Type, Sign) \
void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
......@@ -2153,7 +2152,17 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, U)
SIMD_VISIT_EXTRACT_LANE(I8x16, S)
#undef SIMD_VISIT_EXTRACT_LANE
void InstructionSelector::VisitI32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
#define SIMD_VISIT_REPLACE_LANE(Type) \
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
PPCOperandGenerator g(this); \
int32_t lane = OpParameter<int32_t>(node->op()); \
Emit(kPPC_##Type##ReplaceLane, g.DefineAsRegister(node), \
g.UseUniqueRegister(node->InputAt(0)), g.UseImmediate(lane), \
g.UseUniqueRegister(node->InputAt(1))); \
}
SIMD_TYPES(SIMD_VISIT_REPLACE_LANE)
#undef SIMD_VISIT_REPLACE_LANE
#undef SIMD_TYPES
void InstructionSelector::VisitI32x4Add(Node* node) { UNIMPLEMENTED(); }
......@@ -2189,8 +2198,6 @@ void InstructionSelector::VisitI32x4GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4GeU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8ShrS(Node* node) { UNIMPLEMENTED(); }
......@@ -2251,8 +2258,6 @@ void InstructionSelector::VisitI8x16RoundingAverageU(Node* node) {
void InstructionSelector::VisitI8x16Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16AddSaturateS(Node* node) {
......@@ -2313,8 +2318,6 @@ void InstructionSelector::VisitF32x4Lt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Le(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::EmitPrepareResults(
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
Node* node) {
......@@ -2455,8 +2458,6 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
......
......@@ -358,6 +358,13 @@ void Decoder::UnknownFormat(Instruction* instr, const char* name) {
}
void Decoder::DecodeExt0(Instruction* instr) {
// Some encodings are 5-0 bits, handle those first
switch (EXT0 | (instr->BitField(5, 0))) {
case VPERM: {
Format(instr, "vperm 'Dt, 'Da, 'Db, 'Dc");
return;
}
}
switch (EXT0 | (instr->BitField(10, 0))) {
case VSPLTB: {
Format(instr, "vspltb 'Dt, 'Db, 'UIM");
......@@ -379,6 +386,18 @@ void Decoder::DecodeExt0(Instruction* instr) {
Format(instr, "vor 'Dt, 'Da, 'Db");
break;
}
case VXOR: {
Format(instr, "vxor 'Dt, 'Da, 'Db");
break;
}
case VNOR: {
Format(instr, "vnor 'Dt, 'Da, 'Db");
break;
}
case VSLO: {
Format(instr, "vslo 'Dt, 'Da, 'Db");
break;
}
}
}
......@@ -912,7 +931,7 @@ void Decoder::DecodeExt2(Instruction* instr) {
return;
}
case LVX: {
Format(instr, "lvx 'Dt, 'ra, 'rb");
Format(instr, "lvx 'Dt, 'ra, 'rb");
return;
}
#if V8_TARGET_ARCH_PPC64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment