Commit 0bb40066 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC: Optimize count of trailing zeros on P8 and below

Change-Id: Iff669f6272e2a95d5150108d5a3a77e903afbeb9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3275568Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#77874}
parent 42036e7e
......@@ -3768,33 +3768,39 @@ void TurboAssembler::CountLeadingZerosU64(Register dst, Register src, RCBit r) {
cntlzd(dst, src, r);
}
#define COUNT_TRAILING_ZEROES_SLOW(max_count, scratch1, scratch2) \
Label loop, done; \
li(scratch1, Operand(max_count)); \
mtctr(scratch1); \
mr(scratch1, src); \
li(dst, Operand::Zero()); \
bind(&loop); /* while ((src & 1) == 0) */ \
andi(scratch2, scratch1, Operand(1)); \
bne(&done, cr0); \
srdi(scratch1, scratch1, Operand(1)); /* src >>= 1;*/ \
addi(dst, dst, Operand(1)); /* dst++ */ \
bdnz(&loop); \
bind(&done);
void TurboAssembler::CountTrailingZerosU32(Register dst, Register src,
Register scratch1, Register scratch2,
RCBit r) {
if (CpuFeatures::IsSupported(PPC_9_PLUS)) {
cnttzw(dst, src, r);
} else {
Register scratch1 = GetRegisterThatIsNotOneOf(dst, src, sp);
Register scratch2 = GetRegisterThatIsNotOneOf(dst, src, sp, scratch1);
Push(scratch1, scratch2);
ReverseBitsU32(dst, src, scratch1, scratch2);
Pop(scratch1, scratch2);
cntlzw(dst, dst, r);
COUNT_TRAILING_ZEROES_SLOW(32, scratch1, scratch2);
}
}
void TurboAssembler::CountTrailingZerosU64(Register dst, Register src,
Register scratch1, Register scratch2,
RCBit r) {
if (CpuFeatures::IsSupported(PPC_9_PLUS)) {
cnttzd(dst, src, r);
} else {
Register scratch1 = GetRegisterThatIsNotOneOf(dst, src, sp);
Register scratch2 = GetRegisterThatIsNotOneOf(dst, src, sp, scratch1);
Push(scratch1, scratch2);
ReverseBitsU64(dst, src, scratch1, scratch2);
Pop(scratch1, scratch2);
cntlzd(dst, dst, r);
COUNT_TRAILING_ZEROES_SLOW(64, scratch1, scratch2);
}
}
#undef COUNT_TRAILING_ZEROES_SLOW
void TurboAssembler::ClearByteU64(Register dst, int byte_idx) {
CHECK(0 <= byte_idx && byte_idx <= 7);
......
......@@ -263,8 +263,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void CountLeadingZerosU32(Register dst, Register src, RCBit r = LeaveRC);
void CountLeadingZerosU64(Register dst, Register src, RCBit r = LeaveRC);
void CountTrailingZerosU32(Register dst, Register src, RCBit r = LeaveRC);
void CountTrailingZerosU64(Register dst, Register src, RCBit r = LeaveRC);
void CountTrailingZerosU32(Register dst, Register src, Register scratch1 = ip,
Register scratch2 = r0, RCBit r = LeaveRC);
void CountTrailingZerosU64(Register dst, Register src, Register scratch1 = ip,
Register scratch2 = r0, RCBit r = LeaveRC);
void ClearByteU64(Register dst, int byte_idx);
void ReverseBitsU64(Register dst, Register src, Register scratch1,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment