Commit 407922fa authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC [liftoff]: optimize unsigned byte reverse ops

This CL optimizes ByteReverse 16/32/64 ops on PPC<10 as
well as PPC_10_PLUS.

A 32bit sign extension is also added to `ByteRev32` in codegen.

Change-Id: I6379ac4222f3574ab226971546238142039fe977
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3298308
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/main@{#78048}
parent 50c808d4
...@@ -3579,21 +3579,37 @@ void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst, ...@@ -3579,21 +3579,37 @@ void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst,
addi(sp, sp, Operand(2 * kSimd128Size)); addi(sp, sp, Operand(2 * kSimd128Size));
} }
void TurboAssembler::ByteReverseU16(Register dst, Register val) { void TurboAssembler::ByteReverseU16(Register dst, Register val,
subi(sp, sp, Operand(kSystemPointerSize)); Register scratch) {
sth(val, MemOperand(sp)); if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
lhbrx(dst, MemOperand(r0, sp)); brh(dst, val);
addi(sp, sp, Operand(kSystemPointerSize)); ZeroExtHalfWord(dst, dst);
return;
}
rlwinm(scratch, val, 8, 16, 23);
rlwinm(dst, val, 24, 24, 31);
orx(dst, scratch, dst);
ZeroExtHalfWord(dst, dst);
} }
void TurboAssembler::ByteReverseU32(Register dst, Register val) { void TurboAssembler::ByteReverseU32(Register dst, Register val,
subi(sp, sp, Operand(kSystemPointerSize)); Register scratch) {
stw(val, MemOperand(sp)); if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
lwbrx(dst, MemOperand(r0, sp)); brw(dst, val);
addi(sp, sp, Operand(kSystemPointerSize)); ZeroExtWord32(dst, dst);
return;
}
rotlwi(scratch, val, 8);
rlwimi(scratch, val, 24, 0, 7);
rlwimi(scratch, val, 24, 16, 23);
ZeroExtWord32(dst, dst);
} }
void TurboAssembler::ByteReverseU64(Register dst, Register val) { void TurboAssembler::ByteReverseU64(Register dst, Register val) {
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
brd(dst, val);
return;
}
subi(sp, sp, Operand(kSystemPointerSize)); subi(sp, sp, Operand(kSystemPointerSize));
std(val, MemOperand(sp)); std(val, MemOperand(sp));
ldbrx(dst, MemOperand(r0, sp)); ldbrx(dst, MemOperand(r0, sp));
...@@ -3826,7 +3842,7 @@ void TurboAssembler::ReverseBitsU64(Register dst, Register src, ...@@ -3826,7 +3842,7 @@ void TurboAssembler::ReverseBitsU64(Register dst, Register src,
void TurboAssembler::ReverseBitsU32(Register dst, Register src, void TurboAssembler::ReverseBitsU32(Register dst, Register src,
Register scratch1, Register scratch2) { Register scratch1, Register scratch2) {
ByteReverseU32(dst, src); ByteReverseU32(dst, src, scratch1);
for (int i = 4; i < 8; i++) { for (int i = 4; i < 8; i++) {
ReverseBitsInSingleByteU64(dst, dst, scratch1, scratch2, i); ReverseBitsInSingleByteU64(dst, dst, scratch1, scratch2, i);
} }
......
...@@ -612,8 +612,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -612,8 +612,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Simd128Register scratch); Simd128Register scratch);
void SwapSimd128(MemOperand src, MemOperand dst, Simd128Register scratch); void SwapSimd128(MemOperand src, MemOperand dst, Simd128Register scratch);
void ByteReverseU16(Register dst, Register val); void ByteReverseU16(Register dst, Register val, Register scratch);
void ByteReverseU32(Register dst, Register val); void ByteReverseU32(Register dst, Register val, Register scratch);
void ByteReverseU64(Register dst, Register val); void ByteReverseU64(Register dst, Register val);
// Before calling a C-function from generated code, align arguments on stack. // Before calling a C-function from generated code, align arguments on stack.
......
...@@ -2075,6 +2075,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2075,6 +2075,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register temp1 = r0; Register temp1 = r0;
if (CpuFeatures::IsSupported(PPC_10_PLUS)) { if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
__ brw(output, input); __ brw(output, input);
__ extsw(output, output);
break; break;
} }
__ rotlwi(temp1, input, 8); __ rotlwi(temp1, input, 8);
......
...@@ -559,9 +559,9 @@ constexpr bool is_be = false; ...@@ -559,9 +559,9 @@ constexpr bool is_be = false;
case StoreType::kI64Store16: { \ case StoreType::kI64Store16: { \
auto op_func = [&](Register dst, Register lhs, Register rhs) { \ auto op_func = [&](Register dst, Register lhs, Register rhs) { \
if (is_be) { \ if (is_be) { \
ByteReverseU16(dst, lhs); \ ByteReverseU16(dst, lhs, r0); \
instr(dst, dst, rhs); \ instr(dst, dst, rhs); \
ByteReverseU16(dst, dst); \ ByteReverseU16(dst, dst, r0); \
} else { \ } else { \
instr(dst, lhs, rhs); \ instr(dst, lhs, rhs); \
} \ } \
...@@ -573,9 +573,9 @@ constexpr bool is_be = false; ...@@ -573,9 +573,9 @@ constexpr bool is_be = false;
case StoreType::kI64Store32: { \ case StoreType::kI64Store32: { \
auto op_func = [&](Register dst, Register lhs, Register rhs) { \ auto op_func = [&](Register dst, Register lhs, Register rhs) { \
if (is_be) { \ if (is_be) { \
ByteReverseU32(dst, lhs); \ ByteReverseU32(dst, lhs, r0); \
instr(dst, dst, rhs); \ instr(dst, dst, rhs); \
ByteReverseU32(dst, dst); \ ByteReverseU32(dst, dst, r0); \
} else { \ } else { \
instr(dst, lhs, rhs); \ instr(dst, lhs, rhs); \
} \ } \
...@@ -657,9 +657,9 @@ void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg, ...@@ -657,9 +657,9 @@ void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
case StoreType::kI32Store16: case StoreType::kI32Store16:
case StoreType::kI64Store16: { case StoreType::kI64Store16: {
if (is_be) { if (is_be) {
ByteReverseU16(r0, value.gp()); ByteReverseU16(r0, value.gp(), ip);
TurboAssembler::AtomicExchange<uint16_t>(dst, r0, result.gp()); TurboAssembler::AtomicExchange<uint16_t>(dst, r0, result.gp());
ByteReverseU16(result.gp(), result.gp()); ByteReverseU16(result.gp(), result.gp(), ip);
} else { } else {
TurboAssembler::AtomicExchange<uint16_t>(dst, value.gp(), result.gp()); TurboAssembler::AtomicExchange<uint16_t>(dst, value.gp(), result.gp());
} }
...@@ -668,9 +668,9 @@ void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg, ...@@ -668,9 +668,9 @@ void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
case StoreType::kI32Store: case StoreType::kI32Store:
case StoreType::kI64Store32: { case StoreType::kI64Store32: {
if (is_be) { if (is_be) {
ByteReverseU32(r0, value.gp()); ByteReverseU32(r0, value.gp(), ip);
TurboAssembler::AtomicExchange<uint32_t>(dst, r0, result.gp()); TurboAssembler::AtomicExchange<uint32_t>(dst, r0, result.gp());
ByteReverseU32(result.gp(), result.gp()); ByteReverseU32(result.gp(), result.gp(), ip);
} else { } else {
TurboAssembler::AtomicExchange<uint32_t>(dst, value.gp(), result.gp()); TurboAssembler::AtomicExchange<uint32_t>(dst, value.gp(), result.gp());
} }
...@@ -719,11 +719,11 @@ void LiftoffAssembler::AtomicCompareExchange( ...@@ -719,11 +719,11 @@ void LiftoffAssembler::AtomicCompareExchange(
case StoreType::kI64Store16: { case StoreType::kI64Store16: {
if (is_be) { if (is_be) {
Push(new_value.gp(), expected.gp()); Push(new_value.gp(), expected.gp());
ByteReverseU16(new_value.gp(), new_value.gp()); ByteReverseU16(new_value.gp(), new_value.gp(), r0);
ByteReverseU16(expected.gp(), expected.gp()); ByteReverseU16(expected.gp(), expected.gp(), r0);
TurboAssembler::AtomicCompareExchange<uint16_t>( TurboAssembler::AtomicCompareExchange<uint16_t>(
dst, expected.gp(), new_value.gp(), result.gp(), r0); dst, expected.gp(), new_value.gp(), result.gp(), r0);
ByteReverseU16(result.gp(), result.gp()); ByteReverseU16(result.gp(), result.gp(), r0);
Pop(new_value.gp(), expected.gp()); Pop(new_value.gp(), expected.gp());
} else { } else {
TurboAssembler::AtomicCompareExchange<uint16_t>( TurboAssembler::AtomicCompareExchange<uint16_t>(
...@@ -735,11 +735,11 @@ void LiftoffAssembler::AtomicCompareExchange( ...@@ -735,11 +735,11 @@ void LiftoffAssembler::AtomicCompareExchange(
case StoreType::kI64Store32: { case StoreType::kI64Store32: {
if (is_be) { if (is_be) {
Push(new_value.gp(), expected.gp()); Push(new_value.gp(), expected.gp());
ByteReverseU32(new_value.gp(), new_value.gp()); ByteReverseU32(new_value.gp(), new_value.gp(), r0);
ByteReverseU32(expected.gp(), expected.gp()); ByteReverseU32(expected.gp(), expected.gp(), r0);
TurboAssembler::AtomicCompareExchange<uint32_t>( TurboAssembler::AtomicCompareExchange<uint32_t>(
dst, expected.gp(), new_value.gp(), result.gp(), r0); dst, expected.gp(), new_value.gp(), result.gp(), r0);
ByteReverseU32(result.gp(), result.gp()); ByteReverseU32(result.gp(), result.gp(), r0);
Pop(new_value.gp(), expected.gp()); Pop(new_value.gp(), expected.gp());
} else { } else {
TurboAssembler::AtomicCompareExchange<uint32_t>( TurboAssembler::AtomicCompareExchange<uint32_t>(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment