Commit 6550e154 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [simd]: Optimize I8x16Swizzle on codegen

Using xxbrq we can reverse the order of indices in a
single instruction.

xxbrq is also implemented in the simulator in this CL.

Change-Id: I4744cefeec0a4e07b41fcb0d35de08ad42e55883
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2878573Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#74439}
parent bf003aaf
......@@ -3178,24 +3178,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1),
tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)),
tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
// Saturate the indices to 5 bits. Input indices more than 31 should
// return 0.
__ xxspltib(tempFPReg2, Operand(31));
__ vminub(tempFPReg2, src1, tempFPReg2);
__ addi(sp, sp, Operand(-16));
__ stxvd(src0, MemOperand(r0, sp));
__ ldbrx(r0, MemOperand(r0, sp));
__ li(ip, Operand(8));
__ ldbrx(ip, MemOperand(ip, sp));
__ stdx(ip, MemOperand(r0, sp));
__ li(ip, Operand(8));
__ stdx(r0, MemOperand(ip, sp));
__ lxvd(kScratchSimd128Reg, MemOperand(r0, sp));
__ addi(sp, sp, Operand(16));
__ vxor(tempFPReg1, tempFPReg1, tempFPReg1);
__ vperm(dst, kScratchSimd128Reg, tempFPReg1, tempFPReg2);
__ xxspltib(tempFPReg1, Operand(31));
__ vminub(tempFPReg1, src1, tempFPReg1);
// input needs to be reversed.
__ xxbrq(dst, src0);
__ vxor(kScratchSimd128Reg, kScratchSimd128Reg, kScratchSimd128Reg);
__ vperm(dst, dst, kScratchSimd128Reg, tempFPReg1);
break;
}
case kPPC_F64x2Qfma: {
......
......@@ -3999,6 +3999,15 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
WriteDW(ra_val + rb_val, get_simd_register_by_lane<int64_t>(xs, 0));
break;
}
case XXBRQ: {
int t = instr->RTValue();
int b = instr->RBValue();
__int128 xb_val = *reinterpret_cast<__int128*>(get_simd_register(b).int8);
__int128 xb_val_reversed = __builtin_bswap128(xb_val);
simdr_t simdr_xb = *reinterpret_cast<simdr_t*>(&xb_val_reversed);
set_simd_register(t, simdr_xb);
break;
}
#define VSPLT(type) \
uint32_t uim = instr->Bits(20, 16); \
int vrt = instr->RTValue(); \
......
......@@ -321,6 +321,18 @@ class Simulator : public SimulatorBase {
}
}
// Byte Reverse.
static inline __uint128_t __builtin_bswap128(__uint128_t v) {
union {
uint64_t u64[2];
__uint128_t u128;
} res, val;
val.u128 = v;
res.u64[0] = __builtin_bswap64(val.u64[1]);
res.u64[1] = __builtin_bswap64(val.u64[0]);
return res.u128;
}
#define RW_VAR_LIST(V) \
V(QWU, unsigned __int128) \
V(QW, __int128) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment