Commit 7d231e57 authored by Clemens Hammacher's avatar Clemens Hammacher Committed by Commit Bot

[bits] Consolidate Count{Leading,Trailing}Zeros

Instead of having one method for 32 bit integers and one for 64 bit,
plus a templatized version to choose from those two, just implement one
version which handles unsigned integers of any size. Also, make them
constexpr.
The Count{Leading,Trailing}Zeros{32,64} methods are kept for now in
order to keep the amount of code changes small. Also, sometimes it
improves readability by stating exactly the size of the argument,
especially for leading zeros (where zero-extending would add more
leading zeros).

CountLeadingZeros now uses a binary search inspired implementation
as proposed in Hacker's Delight. It's more than 20% faster on x64 if
the builtins are disabled.
CountTrailingZeros falls back to CountPopulation instead of counting in
a naive loop. This is ~50% faster.

R=mstarzinger@chromium.org

Change-Id: I1d8bf1d7295b930724163248150444bd17fbb34e
Reviewed-on: https://chromium-review.googlesource.com/741231Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#49106}
parent c35c40ae
......@@ -18,7 +18,7 @@ uint32_t RoundUpToPowerOfTwo32(uint32_t value) {
if (value) --value;
// Use computation based on leading zeros if we have compiler support for that.
#if V8_HAS_BUILTIN_CLZ || V8_CC_MSVC
return 1u << (32 - CountLeadingZeros32(value));
return 1u << (32 - CountLeadingZeros(value));
#else
value |= value >> 1;
value |= value >> 2;
......@@ -34,7 +34,7 @@ uint64_t RoundUpToPowerOfTwo64(uint64_t value) {
if (value) --value;
// Use computation based on leading zeros if we have compiler support for that.
#if V8_HAS_BUILTIN_CLZ
return uint64_t{1} << (64 - CountLeadingZeros64(value));
return uint64_t{1} << (64 - CountLeadingZeros(value));
#else
value |= value >> 1;
value |= value >> 2;
......
......@@ -27,33 +27,6 @@ class CheckedNumeric;
namespace bits {
// Define overloaded |Name| for |Name32| and |Name64|, depending on the size of
// the given value.
//
// The overloads are only defined for input types of size 4 and 8, respectively,
// using enable_if and SFINAE to disable them otherwise. enable_if<bool,
// typename> only has a "type" member if the first parameter is true, in which
// case "type" is a typedef to the second member (here, set to "unsigned").
// Otherwise, enable_if::type doesn't exist, making the function signature
// invalid, and so the entire function is thrown away (without an error) due to
// SFINAE.
//
// Not that we cannot simply check sizeof(T) using an if statement, as we need
// both branches of the if to be syntactically valid even if one of the branches
// is dead.
#define DEFINE_32_64_OVERLOADS(Name) \
template <typename T> \
inline typename std::enable_if<sizeof(T) == 4, unsigned>::type Name( \
T value) { \
return Name##32(value); \
} \
\
template <typename T> \
inline typename std::enable_if<sizeof(T) == 8, unsigned>::type Name( \
T value) { \
return Name##64(value); \
}
// CountPopulation(value) returns the number of bits set in |value|.
template <typename T>
constexpr inline
......@@ -80,44 +53,6 @@ constexpr inline
#endif
}
// CountLeadingZeros32(value) returns the number of zero bits following the most
// significant 1 bit in |value| if |value| is non-zero, otherwise it returns 32.
inline unsigned CountLeadingZeros32(uint32_t value) {
#if V8_HAS_BUILTIN_CLZ
return value ? __builtin_clz(value) : 32;
#elif V8_CC_MSVC
unsigned long result; // NOLINT(runtime/int)
if (!_BitScanReverse(&result, value)) return 32;
return static_cast<unsigned>(31 - result);
#else
value = value | (value >> 1);
value = value | (value >> 2);
value = value | (value >> 4);
value = value | (value >> 8);
value = value | (value >> 16);
return CountPopulation(~value);
#endif
}
// CountLeadingZeros64(value) returns the number of zero bits following the most
// significant 1 bit in |value| if |value| is non-zero, otherwise it returns 64.
inline unsigned CountLeadingZeros64(uint64_t value) {
#if V8_HAS_BUILTIN_CLZ
return value ? __builtin_clzll(value) : 64;
#else
value = value | (value >> 1);
value = value | (value >> 2);
value = value | (value >> 4);
value = value | (value >> 8);
value = value | (value >> 16);
value = value | (value >> 32);
return CountPopulation(~value);
#endif
}
DEFINE_32_64_OVERLOADS(CountLeadingZeros)
// ReverseBits(value) returns |value| in reverse bit order.
template <typename T>
T ReverseBits(T value) {
......@@ -131,42 +66,66 @@ T ReverseBits(T value) {
return result;
}
// CountTrailingZeros32(value) returns the number of zero bits preceding the
// least significant 1 bit in |value| if |value| is non-zero, otherwise it
// returns 32.
inline unsigned CountTrailingZeros32(uint32_t value) {
#if V8_HAS_BUILTIN_CTZ
return value ? __builtin_ctz(value) : 32;
#elif V8_CC_MSVC
unsigned long result; // NOLINT(runtime/int)
if (!_BitScanForward(&result, value)) return 32;
return static_cast<unsigned>(result);
// CountLeadingZeros(value) returns the number of zero bits following the most
// significant 1 bit in |value| if |value| is non-zero, otherwise it returns
// {sizeof(T) * 8}.
template <typename T, unsigned bits = sizeof(T) * 8>
inline constexpr
typename std::enable_if<std::is_unsigned<T>::value && sizeof(T) <= 8,
unsigned>::type
CountLeadingZeros(T value) {
#if V8_HAS_BUILTIN_CLZ
return value == 0
? bits
: bits == 64
? __builtin_clzll(static_cast<uint64_t>(value))
: __builtin_clz(static_cast<uint32_t>(value)) - (32 - bits);
#else
if (value == 0) return 32;
unsigned count = 0;
for (value ^= value - 1; value >>= 1; ++count) {
}
return count;
// Binary search algorithm taken from "Hacker's Delight" (by Henry S. Warren,
// Jr.), figures 5-11 and 5-12.
if (bits == 1) return static_cast<unsigned>(value) ^ 1;
T upper_half = value >> (bits / 2);
T next_value = upper_half != 0 ? upper_half : value;
unsigned add = upper_half != 0 ? 0 : bits / 2;
return CountLeadingZeros<T, bits / 2>(next_value) + add;
#endif
}
inline constexpr unsigned CountLeadingZeros32(uint32_t value) {
return CountLeadingZeros(value);
}
inline constexpr unsigned CountLeadingZeros64(uint64_t value) {
return CountLeadingZeros(value);
}
// CountTrailingZeros64(value) returns the number of zero bits preceding the
// CountTrailingZeros(value) returns the number of zero bits preceding the
// least significant 1 bit in |value| if |value| is non-zero, otherwise it
// returns 64.
inline unsigned CountTrailingZeros64(uint64_t value) {
// returns {sizeof(T) * 8}.
template <typename T, unsigned bits = sizeof(T) * 8>
inline constexpr
typename std::enable_if<std::is_integral<T>::value && sizeof(T) <= 8,
unsigned>::type
CountTrailingZeros(T value) {
#if V8_HAS_BUILTIN_CTZ
return value ? __builtin_ctzll(value) : 64;
return value == 0 ? bits
: bits == 64 ? __builtin_ctzll(static_cast<uint64_t>(value))
: __builtin_ctz(static_cast<uint32_t>(value));
#else
if (value == 0) return 64;
unsigned count = 0;
for (value ^= value - 1; value >>= 1; ++count) {
}
return count;
// Fall back to popcount (see "Hacker's Delight" by Henry S. Warren, Jr.),
// chapter 5-4. On x64, since is faster than counting in a loop and faster
// than doing binary search.
using U = typename std::make_unsigned<T>::type;
U u = value;
return CountPopulation(static_cast<U>(~u & (u - 1u)));
#endif
}
DEFINE_32_64_OVERLOADS(CountTrailingZeros)
inline constexpr unsigned CountTrailingZeros32(uint32_t value) {
return CountTrailingZeros(value);
}
inline constexpr unsigned CountTrailingZeros64(uint64_t value) {
return CountTrailingZeros(value);
}
// Returns true iff |value| is a power of 2.
template <typename T,
......@@ -337,8 +296,6 @@ V8_BASE_EXPORT int64_t SignedSaturatedAdd64(int64_t lhs, int64_t rhs);
// checks and returns the result.
V8_BASE_EXPORT int64_t SignedSaturatedSub64(int64_t lhs, int64_t rhs);
#undef DEFINE_32_64_OVERLOADS
} // namespace bits
} // namespace base
} // namespace v8
......
......@@ -121,7 +121,7 @@ Node* MachineOperatorReducer::Uint32Div(Node* dividend, uint32_t divisor) {
DCHECK_LT(0u, divisor);
// If the divisor is even, we can avoid using the expensive fixup by shifting
// the dividend upfront.
unsigned const shift = base::bits::CountTrailingZeros32(divisor);
unsigned const shift = base::bits::CountTrailingZeros(divisor);
dividend = Word32Shr(dividend, shift);
divisor >>= shift;
// Compute the magic number for the (shifted) divisor.
......@@ -1181,7 +1181,7 @@ Reduction MachineOperatorReducer::ReduceWord32And(Node* node) {
Uint32BinopMatcher mleft(m.left().node());
if (mleft.right().HasValue() &&
(mleft.right().Value() & 0x1f) >=
base::bits::CountTrailingZeros32(mask)) {
base::bits::CountTrailingZeros(mask)) {
// (x << L) & (-1 << K) => x << L iff L >= K
return Replace(mleft.node());
}
......@@ -1222,7 +1222,7 @@ Reduction MachineOperatorReducer::ReduceWord32And(Node* node) {
}
if (mleft.left().IsWord32Shl()) {
Int32BinopMatcher mleftleft(mleft.left().node());
if (mleftleft.right().Is(base::bits::CountTrailingZeros32(mask))) {
if (mleftleft.right().Is(base::bits::CountTrailingZeros(mask))) {
// (y << L + x) & (-1 << L) => (x & (-1 << L)) + y << L
node->ReplaceInput(0,
Word32And(mleft.right().node(), m.right().node()));
......@@ -1234,7 +1234,7 @@ Reduction MachineOperatorReducer::ReduceWord32And(Node* node) {
}
if (mleft.right().IsWord32Shl()) {
Int32BinopMatcher mleftright(mleft.right().node());
if (mleftright.right().Is(base::bits::CountTrailingZeros32(mask))) {
if (mleftright.right().Is(base::bits::CountTrailingZeros(mask))) {
// (x + y << L) & (-1 << L) => (x & (-1 << L)) + y << L
node->ReplaceInput(0,
Word32And(mleft.left().node(), m.right().node()));
......
......@@ -402,7 +402,7 @@ void LiveObjectRange<mode>::iterator::AdvanceToNextValidObject() {
HeapObject* object = nullptr;
int size = 0;
while (current_cell_ != 0) {
uint32_t trailing_zeros = base::bits::CountTrailingZeros32(current_cell_);
uint32_t trailing_zeros = base::bits::CountTrailingZeros(current_cell_);
Address addr = cell_base_ + trailing_zeros * kPointerSize;
// Clear the first bit of the found object..
......
......@@ -200,7 +200,7 @@ class SlotSet : public Malloced {
uint32_t old_cell = cell;
uint32_t mask = 0;
while (cell) {
int bit_offset = base::bits::CountTrailingZeros32(cell);
int bit_offset = base::bits::CountTrailingZeros(cell);
uint32_t bit_mask = 1u << bit_offset;
uint32_t slot = (cell_offset + bit_offset) << kPointerSizeLog2;
if (callback(page_start_ + slot) == KEEP_SLOT) {
......
......@@ -10,7 +10,7 @@
#include "src/handles-inl.h"
#include "src/objects-inl.h"
using v8::base::bits::CountTrailingZeros32;
using v8::base::bits::CountTrailingZeros;
namespace v8 {
namespace internal {
......@@ -144,7 +144,7 @@ bool LayoutDescriptor::IsTagged(int field_index, int max_sequence_length,
bool is_tagged = (value & layout_mask) == 0;
if (!is_tagged) value = ~value; // Count set bits instead of cleared bits.
value = value & ~(layout_mask - 1); // Clear bits we are not interested in.
int sequence_length = CountTrailingZeros32(value) - layout_bit_index;
int sequence_length = CountTrailingZeros(value) - layout_bit_index;
if (layout_bit_index + sequence_length == kBitsPerLayoutWord) {
// This is a contiguous sequence till the end of current word, proceed
......@@ -157,7 +157,7 @@ bool LayoutDescriptor::IsTagged(int field_index, int max_sequence_length,
bool cur_is_tagged = (value & 1) == 0;
if (cur_is_tagged != is_tagged) break;
if (!is_tagged) value = ~value; // Count set bits instead.
int cur_sequence_length = CountTrailingZeros32(value);
int cur_sequence_length = CountTrailingZeros(value);
sequence_length += cur_sequence_length;
if (sequence_length >= max_sequence_length) break;
if (cur_sequence_length != kBitsPerLayoutWord) break;
......
......@@ -1414,7 +1414,7 @@ MaybeHandle<String> BigInt::ToStringBasePowerOfTwo(Handle<BigInt> x,
const int length = x->length();
const bool sign = x->sign();
const int bits_per_char = base::bits::CountTrailingZeros32(radix);
const int bits_per_char = base::bits::CountTrailingZeros(radix);
const int char_mask = radix - 1;
// Compute the length of the resulting string: divide the bit length of the
// BigInt by the number of bits representable per character (rounding up).
......
......@@ -154,11 +154,11 @@ RUNTIME_FUNCTION(Runtime_SmiLexicographicCompare) {
// integer comes first in the lexicographic order.
// From http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
int x_log2 = 31 - base::bits::CountLeadingZeros32(x_scaled);
int x_log2 = 31 - base::bits::CountLeadingZeros(x_scaled);
int x_log10 = ((x_log2 + 1) * 1233) >> 12;
x_log10 -= x_scaled < kPowersOf10[x_log10];
int y_log2 = 31 - base::bits::CountLeadingZeros32(y_scaled);
int y_log2 = 31 - base::bits::CountLeadingZeros(y_scaled);
int y_log10 = ((y_log2 + 1) * 1233) >> 12;
y_log10 -= y_scaled < kPowersOf10[y_log10];
......
......@@ -134,7 +134,7 @@ class LiftoffAssembler : public TurboAssembler {
RegList available_regs =
kGpCacheRegs & ~used_registers & ~pinned_scope.pinned_regs();
Register reg =
Register::from_code(base::bits::CountTrailingZeros64(available_regs));
Register::from_code(base::bits::CountTrailingZeros(available_regs));
DCHECK_EQ(0, used_registers & reg.bit());
return reg;
}
......@@ -176,7 +176,7 @@ class LiftoffAssembler : public TurboAssembler {
RegList remaining_regs = unpinned_regs & ~mask;
if (!remaining_regs) remaining_regs = unpinned_regs;
last_spilled_reg =
Register::from_code(base::bits::CountTrailingZeros64(remaining_regs));
Register::from_code(base::bits::CountTrailingZeros(remaining_regs));
return last_spilled_reg;
}
};
......
......@@ -201,12 +201,12 @@ int32_t uint64_mod_wrapper(uint64_t* dst, uint64_t* src) {
}
uint32_t word32_ctz_wrapper(uint32_t* input) {
return static_cast<uint32_t>(base::bits::CountTrailingZeros32(*input));
return static_cast<uint32_t>(base::bits::CountTrailingZeros(*input));
}
uint32_t word64_ctz_wrapper(uint64_t* input) {
return static_cast<uint32_t>(
base::bits::CountTrailingZeros64(ReadUnalignedValue<uint64_t>(input)));
base::bits::CountTrailingZeros(ReadUnalignedValue<uint64_t>(input)));
}
uint32_t word32_popcnt_wrapper(uint32_t* input) {
......
......@@ -368,11 +368,11 @@ inline uint32_t ExecuteI32AsmjsUConvertF64(double a, TrapReason* trap) {
}
int32_t ExecuteI32Clz(uint32_t val, TrapReason* trap) {
return base::bits::CountLeadingZeros32(val);
return base::bits::CountLeadingZeros(val);
}
uint32_t ExecuteI32Ctz(uint32_t val, TrapReason* trap) {
return base::bits::CountTrailingZeros32(val);
return base::bits::CountTrailingZeros(val);
}
uint32_t ExecuteI32Popcnt(uint32_t val, TrapReason* trap) {
......@@ -384,11 +384,11 @@ inline uint32_t ExecuteI32Eqz(uint32_t val, TrapReason* trap) {
}
int64_t ExecuteI64Clz(uint64_t val, TrapReason* trap) {
return base::bits::CountLeadingZeros64(val);
return base::bits::CountLeadingZeros(val);
}
inline uint64_t ExecuteI64Ctz(uint64_t val, TrapReason* trap) {
return base::bits::CountTrailingZeros64(val);
return base::bits::CountTrailingZeros(val);
}
inline int64_t ExecuteI64Popcnt(uint64_t val, TrapReason* trap) {
......
......@@ -1185,8 +1185,8 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
if (src == 0) {
Xorpd(dst, dst);
} else {
unsigned nlz = base::bits::CountLeadingZeros64(src);
unsigned ntz = base::bits::CountTrailingZeros64(src);
unsigned nlz = base::bits::CountLeadingZeros(src);
unsigned ntz = base::bits::CountTrailingZeros(src);
unsigned pop = base::bits::CountPopulation(src);
DCHECK_NE(0u, pop);
if (pop == 64) {
......
......@@ -18,6 +18,15 @@ namespace v8 {
namespace base {
namespace bits {
TEST(Bits, CountPopulation16) {
EXPECT_EQ(0u, CountPopulation(uint16_t{0}));
EXPECT_EQ(1u, CountPopulation(uint16_t{1}));
EXPECT_EQ(4u, CountPopulation(uint16_t{0x1111}));
EXPECT_EQ(8u, CountPopulation(uint16_t{0xf0f0}));
EXPECT_EQ(12u, CountPopulation(uint16_t{0xf0ff}));
EXPECT_EQ(16u, CountPopulation(uint16_t{0xffff}));
}
TEST(Bits, CountPopulation32) {
EXPECT_EQ(0u, CountPopulation(uint32_t{0}));
EXPECT_EQ(1u, CountPopulation(uint32_t{1}));
......@@ -27,7 +36,6 @@ TEST(Bits, CountPopulation32) {
EXPECT_EQ(32u, CountPopulation(uint32_t{0xffffffff}));
}
TEST(Bits, CountPopulation64) {
EXPECT_EQ(0u, CountPopulation(uint64_t{0}));
EXPECT_EQ(1u, CountPopulation(uint64_t{1}));
......@@ -42,46 +50,71 @@ TEST(Bits, CountPopulation64) {
EXPECT_EQ(64u, CountPopulation(uint64_t{0xffffffffffffffff}));
}
TEST(Bits, CountLeadingZeros16) {
EXPECT_EQ(16u, CountLeadingZeros(uint16_t{0}));
EXPECT_EQ(15u, CountLeadingZeros(uint16_t{1}));
TRACED_FORRANGE(uint16_t, shift, 0, 15) {
EXPECT_EQ(15u - shift,
CountLeadingZeros(static_cast<uint16_t>(1 << shift)));
}
EXPECT_EQ(4u, CountLeadingZeros(uint16_t{0x0f0f}));
}
TEST(Bits, CountLeadingZeros32) {
EXPECT_EQ(32u, CountLeadingZeros32(0));
EXPECT_EQ(31u, CountLeadingZeros32(1));
EXPECT_EQ(32u, CountLeadingZeros(uint32_t{0}));
EXPECT_EQ(31u, CountLeadingZeros(uint32_t{1}));
TRACED_FORRANGE(uint32_t, shift, 0, 31) {
EXPECT_EQ(31u - shift, CountLeadingZeros32(1u << shift));
EXPECT_EQ(31u - shift, CountLeadingZeros(uint32_t{1} << shift));
}
EXPECT_EQ(4u, CountLeadingZeros32(0x0f0f0f0f));
EXPECT_EQ(4u, CountLeadingZeros(uint32_t{0x0f0f0f0f}));
}
TEST(Bits, CountLeadingZeros64) {
EXPECT_EQ(64u, CountLeadingZeros64(0));
EXPECT_EQ(63u, CountLeadingZeros64(1));
EXPECT_EQ(64u, CountLeadingZeros(uint64_t{0}));
EXPECT_EQ(63u, CountLeadingZeros(uint64_t{1}));
TRACED_FORRANGE(uint32_t, shift, 0, 63) {
EXPECT_EQ(63u - shift, CountLeadingZeros64(V8_UINT64_C(1) << shift));
EXPECT_EQ(63u - shift, CountLeadingZeros(uint64_t{1} << shift));
}
EXPECT_EQ(36u, CountLeadingZeros64(0x0f0f0f0f));
EXPECT_EQ(4u, CountLeadingZeros64(0x0f0f0f0f00000000));
EXPECT_EQ(36u, CountLeadingZeros(uint64_t{0x0f0f0f0f}));
EXPECT_EQ(4u, CountLeadingZeros(uint64_t{0x0f0f0f0f00000000}));
}
TEST(Bits, CountTrailingZeros16) {
EXPECT_EQ(16u, CountTrailingZeros(uint16_t{0}));
EXPECT_EQ(15u, CountTrailingZeros(uint16_t{0x8000}));
TRACED_FORRANGE(uint16_t, shift, 0, 15) {
EXPECT_EQ(shift, CountTrailingZeros(static_cast<uint16_t>(1 << shift)));
}
EXPECT_EQ(4u, CountTrailingZeros(uint16_t{0xf0f0u}));
}
TEST(Bits, CountTrailingZeros32) {
EXPECT_EQ(32u, CountTrailingZeros32(0));
EXPECT_EQ(31u, CountTrailingZeros32(0x80000000));
TEST(Bits, CountTrailingZerosu32) {
EXPECT_EQ(32u, CountTrailingZeros(uint32_t{0}));
EXPECT_EQ(31u, CountTrailingZeros(uint32_t{0x80000000}));
TRACED_FORRANGE(uint32_t, shift, 0, 31) {
EXPECT_EQ(shift, CountTrailingZeros32(1u << shift));
EXPECT_EQ(shift, CountTrailingZeros(uint32_t{1} << shift));
}
EXPECT_EQ(4u, CountTrailingZeros32(0xf0f0f0f0));
EXPECT_EQ(4u, CountTrailingZeros(uint32_t{0xf0f0f0f0u}));
}
TEST(Bits, CountTrailingZerosi32) {
EXPECT_EQ(32u, CountTrailingZeros(int32_t{0}));
TRACED_FORRANGE(uint32_t, shift, 0, 31) {
EXPECT_EQ(shift, CountTrailingZeros(int32_t{1} << shift));
}
EXPECT_EQ(4u, CountTrailingZeros(int32_t{0x70f0f0f0u}));
EXPECT_EQ(2u, CountTrailingZeros(int32_t{-4}));
EXPECT_EQ(0u, CountTrailingZeros(int32_t{-1}));
}
TEST(Bits, CountTrailingZeros64) {
EXPECT_EQ(64u, CountTrailingZeros64(0));
EXPECT_EQ(63u, CountTrailingZeros64(0x8000000000000000));
EXPECT_EQ(64u, CountTrailingZeros(uint64_t{0}));
EXPECT_EQ(63u, CountTrailingZeros(uint64_t{0x8000000000000000}));
TRACED_FORRANGE(uint32_t, shift, 0, 63) {
EXPECT_EQ(shift, CountTrailingZeros64(V8_UINT64_C(1) << shift));
EXPECT_EQ(shift, CountTrailingZeros(uint64_t{1} << shift));
}
EXPECT_EQ(4u, CountTrailingZeros64(0xf0f0f0f0));
EXPECT_EQ(36u, CountTrailingZeros64(0xf0f0f0f000000000));
EXPECT_EQ(4u, CountTrailingZeros(uint64_t{0xf0f0f0f0}));
EXPECT_EQ(36u, CountTrailingZeros(uint64_t{0xf0f0f0f000000000}));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment