Commit 0674165d authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

S390 [liftoff]: Implement simd swizzle and shuffle

Change-Id: I26a9c7567c36422c01c089d9f71411b3e73c681d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3457684Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#79057}
parent d0222381
......@@ -5977,6 +5977,36 @@ void TurboAssembler::S128Const(Simd128Register dst, uint64_t high, uint64_t low,
vlvgp(dst, scratch2, scratch1);
}
void TurboAssembler::I8x16Swizzle(Simd128Register dst, Simd128Register src1,
Simd128Register src2,
Simd128Register scratch1,
Simd128Register scratch2) {
DCHECK_NE(src1, scratch2);
// Saturate the indices to 5 bits. Input indices more than 31 should
// return 0.
vrepi(scratch1, Operand(31), Condition(0));
vmnl(scratch2, src2, scratch1, Condition(0), Condition(0), Condition(0));
// Input needs to be reversed.
vlgv(r0, src1, MemOperand(r0, 0), Condition(3));
vlgv(r1, src1, MemOperand(r0, 1), Condition(3));
lrvgr(r0, r0);
lrvgr(r1, r1);
vlvgp(dst, r1, r0);
// Clear scratch.
vx(scratch1, scratch1, scratch1, Condition(0), Condition(0), Condition(0));
vperm(dst, dst, scratch1, scratch2, Condition(0), Condition(0));
}
void TurboAssembler::I8x16Shuffle(Simd128Register dst, Simd128Register src1,
Simd128Register src2, uint64_t high,
uint64_t low, Register scratch1,
Register scratch2, Simd128Register scratch3) {
mov(scratch1, Operand(low));
mov(scratch2, Operand(high));
vlvgp(kScratchDoubleReg, scratch2, scratch1);
vperm(dst, src1, src2, scratch3, Condition(0), Condition(0));
}
// Vector LE Load and Transform instructions.
#ifdef V8_TARGET_BIG_ENDIAN
#define IS_BIG_ENDIAN true
......
......@@ -1144,8 +1144,15 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Simd128Register scratch);
void I32x4TruncSatF64x2UZero(Simd128Register dst, Simd128Register src,
Simd128Register scratch);
void I8x16Swizzle(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register scratch1,
Simd128Register scratch2);
void S128Const(Simd128Register dst, uint64_t high, uint64_t low,
Register scratch1, Register scratch2);
void I8x16Shuffle(Simd128Register dst, Simd128Register src1,
Simd128Register src2, uint64_t high, uint64_t low,
Register scratch1, Register scratch2,
Simd128Register scratch3);
void S128Select(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register mask);
......
......@@ -2937,39 +2937,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kS390_I8x16Shuffle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
uint64_t low = make_uint64(i.InputUint32(3), i.InputUint32(2));
uint64_t high = make_uint64(i.InputUint32(5), i.InputUint32(4));
__ mov(r0, Operand(low));
__ mov(ip, Operand(high));
__ vlvgp(kScratchDoubleReg, ip, r0);
__ vperm(dst, src0, src1, kScratchDoubleReg, Condition(0), Condition(0));
__ I8x16Shuffle(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), high, low, r0, ip,
kScratchDoubleReg);
break;
}
case kS390_I8x16Swizzle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
DCHECK_NE(src0, tempFPReg1);
// Saturate the indices to 5 bits. Input indices more than 31 should
// return 0.
__ vrepi(kScratchDoubleReg, Operand(31), Condition(0));
__ vmnl(tempFPReg1, src1, kScratchDoubleReg, Condition(0), Condition(0),
Condition(0));
// input needs to be reversed
__ vlgv(r0, src0, MemOperand(r0, 0), Condition(3));
__ vlgv(r1, src0, MemOperand(r0, 1), Condition(3));
__ lrvgr(r0, r0);
__ lrvgr(r1, r1);
__ vlvgp(dst, r1, r0);
// clear scratch
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
__ vperm(dst, dst, kScratchDoubleReg, tempFPReg1, Condition(0),
Condition(0));
__ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchDoubleReg,
i.ToSimd128Register(instr->TempAt(0)));
break;
}
case kS390_I64x2BitMask: {
......
......@@ -2592,7 +2592,12 @@ void LiftoffAssembler::StoreLane(Register dst, Register offset,
void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kUnsupportedArchitecture, "emit_i8x16_swizzle");
Simd128Register src1 = lhs.fp();
Simd128Register src2 = rhs.fp();
Simd128Register dest = dst.fp();
Simd128Register temp =
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dest, src1, src2)).fp();
I8x16Swizzle(dest, src1, src2, kScratchDoubleReg, temp);
}
void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
......@@ -2642,7 +2647,26 @@ void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister rhs,
const uint8_t shuffle[16],
bool is_swizzle) {
bailout(kSimd, "i8x16_shuffle");
// Remap the shuffle indices to match IBM lane numbering.
// TODO(miladfarca): Put this in a function and share it with the instrction
// selector.
int max_index = 15;
int total_lane_count = 2 * kSimd128Size;
uint8_t shuffle_remapped[kSimd128Size];
for (int i = 0; i < kSimd128Size; i++) {
uint8_t current_index = shuffle[i];
shuffle_remapped[i] = (current_index <= max_index
? max_index - current_index
: total_lane_count - current_index + max_index);
}
uint64_t vals[2];
memcpy(vals, shuffle_remapped, sizeof(shuffle_remapped));
#ifdef V8_TARGET_BIG_ENDIAN
vals[0] = ByteReverse(vals[0]);
vals[1] = ByteReverse(vals[1]);
#endif
I8x16Shuffle(dst.fp(), lhs.fp(), rhs.fp(), vals[1], vals[0], r0, ip,
kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment