Commit 3bf2935f authored by Frank Emrich's avatar Frank Emrich Committed by Commit Bot

[csa] Make CTZ, CLZ, and POPCOUNT available in CSA

This CL makes CTZ (count trailing zeros) and POPCOUNT (count set bits),
which are optional ops in the raw machine assembler, available in CSA.
A fallback exists for the case that they are not available.

This CL also adds the 64 bit version of the mandatory CLZ (count
leading zeros) op available.

Change-Id: I53cd6738b8ede8bd5842a83bb1161299824d39c9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2742207Reviewed-by: 's avatarNico Hartmann <nicohartmann@chromium.org>
Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Commit-Queue: Frank Emrich <emrich@google.com>
Cr-Commit-Position: refs/heads/master@{#73541}
parent cc09f7ff
......@@ -549,6 +549,130 @@ TNode<Float64T> CodeStubAssembler::Float64Trunc(TNode<Float64T> x) {
return var_x.value();
}
TNode<IntPtrT> CodeStubAssembler::PopulationCountFallback(
TNode<UintPtrT> value) {
// Taken from slow path of base::bits::CountPopulation, the comments here show
// C++ code and comments from there for reference.
// Fall back to divide-and-conquer popcount (see "Hacker's Delight" by Henry
// S. Warren, Jr.), chapter 5-1.
constexpr uintptr_t mask[] = {static_cast<uintptr_t>(0x5555555555555555),
static_cast<uintptr_t>(0x3333333333333333),
static_cast<uintptr_t>(0x0f0f0f0f0f0f0f0f)};
// TNode<UintPtrT> value = Unsigned(value_word);
TNode<UintPtrT> lhs, rhs;
// Start with 64 buckets of 1 bits, holding values from [0,1].
// {value = ((value >> 1) & mask[0]) + (value & mask[0])}
lhs = WordAnd(WordShr(value, UintPtrConstant(1)), UintPtrConstant(mask[0]));
rhs = WordAnd(value, UintPtrConstant(mask[0]));
value = UintPtrAdd(lhs, rhs);
// Having 32 buckets of 2 bits, holding values from [0,2] now.
// {value = ((value >> 2) & mask[1]) + (value & mask[1])}
lhs = WordAnd(WordShr(value, UintPtrConstant(2)), UintPtrConstant(mask[1]));
rhs = WordAnd(value, UintPtrConstant(mask[1]));
value = UintPtrAdd(lhs, rhs);
// Having 16 buckets of 4 bits, holding values from [0,4] now.
// {value = ((value >> 4) & mask[2]) + (value & mask[2])}
lhs = WordAnd(WordShr(value, UintPtrConstant(4)), UintPtrConstant(mask[2]));
rhs = WordAnd(value, UintPtrConstant(mask[2]));
value = UintPtrAdd(lhs, rhs);
// Having 8 buckets of 8 bits, holding values from [0,8] now.
// From this point on, the buckets are bigger than the number of bits
// required to hold the values, and the buckets are bigger the maximum
// result, so there's no need to mask value anymore, since there's no
// more risk of overflow between buckets.
// {value = (value >> 8) + value}
lhs = WordShr(value, UintPtrConstant(8));
value = UintPtrAdd(lhs, value);
// Having 4 buckets of 16 bits, holding values from [0,16] now.
// {value = (value >> 16) + value}
lhs = WordShr(value, UintPtrConstant(16));
value = UintPtrAdd(lhs, value);
if (Is64()) {
// Having 2 buckets of 32 bits, holding values from [0,32] now.
// {value = (value >> 32) + value}
lhs = WordShr(value, UintPtrConstant(32));
value = UintPtrAdd(lhs, value);
}
// Having 1 buckets of sizeof(intptr_t) bits, holding values from [0,64] now.
// {return static_cast<unsigned>(value & 0xff)}
return Signed(WordAnd(value, UintPtrConstant(0xff)));
}
TNode<Int64T> CodeStubAssembler::Word64PopulationCount(TNode<Word64T> value) {
if (IsWord64PopcntSupported()) {
return Word64Popcnt(value);
}
if (Is32()) {
// Unsupported.
UNREACHABLE();
}
return ReinterpretCast<Int64T>(
PopulationCountFallback(ReinterpretCast<UintPtrT>(value)));
}
TNode<Int32T> CodeStubAssembler::Word32PopulationCount(TNode<Word32T> value) {
if (IsWord32PopcntSupported()) {
return Word32Popcnt(value);
}
if (Is32()) {
TNode<IntPtrT> res =
PopulationCountFallback(ReinterpretCast<UintPtrT>(value));
return ReinterpretCast<Int32T>(res);
} else {
TNode<IntPtrT> res = PopulationCountFallback(
ReinterpretCast<UintPtrT>(ChangeUint32ToUint64(value)));
return TruncateInt64ToInt32(ReinterpretCast<Int64T>(res));
}
}
TNode<Int64T> CodeStubAssembler::Word64CountTrailingZeros(
TNode<Word64T> value) {
if (IsWord64CtzSupported()) {
return Word64Ctz(value);
}
if (Is32()) {
// Unsupported.
UNREACHABLE();
}
// Same fallback as in base::bits::CountTrailingZeros.
// Fall back to popcount (see "Hacker's Delight" by Henry S. Warren, Jr.),
// chapter 5-4. On x64, since is faster than counting in a loop and faster
// than doing binary search.
TNode<Word64T> lhs = Word64Not(value);
TNode<Word64T> rhs = Uint64Sub(Unsigned(value), Uint64Constant(1));
return Word64PopulationCount(Word64And(lhs, rhs));
}
TNode<Int32T> CodeStubAssembler::Word32CountTrailingZeros(
TNode<Word32T> value) {
if (IsWord32CtzSupported()) {
return Word32Ctz(value);
}
if (Is32()) {
// Same fallback as in Word64CountTrailingZeros.
TNode<Word32T> lhs = Word32BitwiseNot(value);
TNode<Word32T> rhs = Int32Sub(Signed(value), Int32Constant(1));
return Word32PopulationCount(Word32And(lhs, rhs));
} else {
TNode<Int64T> res64 = Word64CountTrailingZeros(ChangeUint32ToUint64(value));
return TruncateInt64ToInt32(Signed(res64));
}
}
template <>
TNode<Smi> CodeStubAssembler::TaggedToParameter(TNode<Smi> value) {
return value;
......
......@@ -540,6 +540,12 @@ class V8_EXPORT_PRIVATE CodeStubAssembler
bool TryGetIntPtrOrSmiConstantValue(TNode<IntPtrT> maybe_constant,
int* value);
TNode<IntPtrT> PopulationCountFallback(TNode<UintPtrT> value);
TNode<Int64T> Word64PopulationCount(TNode<Word64T> value);
TNode<Int32T> Word32PopulationCount(TNode<Word32T> value);
TNode<Int64T> Word64CountTrailingZeros(TNode<Word64T> value);
TNode<Int32T> Word32CountTrailingZeros(TNode<Word32T> value);
// Round the 32bits payload of the provided word up to the next power of two.
TNode<IntPtrT> IntPtrRoundUpToPowerOfTwo32(TNode<IntPtrT> value);
// Select the maximum of the two provided IntPtr values.
......
......@@ -225,6 +225,22 @@ bool CodeAssembler::IsIntPtrAbsWithOverflowSupported() const {
: IsInt32AbsWithOverflowSupported();
}
bool CodeAssembler::IsWord32PopcntSupported() const {
return raw_assembler()->machine()->Word32Popcnt().IsSupported();
}
bool CodeAssembler::IsWord64PopcntSupported() const {
return raw_assembler()->machine()->Word64Popcnt().IsSupported();
}
bool CodeAssembler::IsWord32CtzSupported() const {
return raw_assembler()->machine()->Word32Ctz().IsSupported();
}
bool CodeAssembler::IsWord64CtzSupported() const {
return raw_assembler()->machine()->Word64Ctz().IsSupported();
}
#ifdef DEBUG
void CodeAssembler::GenerateCheckMaybeObjectIsObject(TNode<MaybeObject> node,
const char* location) {
......
......@@ -366,6 +366,11 @@ TNode<Float64T> Float64Add(TNode<Float64T> a, TNode<Float64T> b);
V(Float64RoundTiesEven, Float64T, Float64T) \
V(Float64RoundTruncate, Float64T, Float64T) \
V(Word32Clz, Int32T, Word32T) \
V(Word64Clz, Int64T, Word64T) \
V(Word32Ctz, Int32T, Word32T) \
V(Word64Ctz, Int64T, Word64T) \
V(Word32Popcnt, Int32T, Word32T) \
V(Word64Popcnt, Int64T, Word64T) \
V(Word32BitwiseNot, Word32T, Word32T) \
V(WordNot, WordT, WordT) \
V(Word64Not, Word64T, Word64T) \
......@@ -415,6 +420,10 @@ class V8_EXPORT_PRIVATE CodeAssembler {
bool IsInt32AbsWithOverflowSupported() const;
bool IsInt64AbsWithOverflowSupported() const;
bool IsIntPtrAbsWithOverflowSupported() const;
bool IsWord32PopcntSupported() const;
bool IsWord64PopcntSupported() const;
bool IsWord32CtzSupported() const;
bool IsWord64CtzSupported() const;
// Shortened aliases for use in CodeAssembler subclasses.
using Label = CodeAssemblerLabel;
......
......@@ -270,6 +270,7 @@ class MachineRepresentationInferrer {
case IrOpcode::kRoundFloat64ToInt32:
case IrOpcode::kFloat64ExtractLowWord32:
case IrOpcode::kFloat64ExtractHighWord32:
case IrOpcode::kWord32Popcnt:
MACHINE_UNOP_32_LIST(LABEL)
MACHINE_BINOP_32_LIST(LABEL) {
representation_vector_[node->id()] =
......@@ -283,6 +284,9 @@ class MachineRepresentationInferrer {
case IrOpcode::kBitcastFloat64ToInt64:
case IrOpcode::kChangeFloat64ToInt64:
case IrOpcode::kChangeFloat64ToUint64:
case IrOpcode::kWord64Popcnt:
case IrOpcode::kWord64Ctz:
case IrOpcode::kWord64Clz:
MACHINE_BINOP_64_LIST(LABEL) {
representation_vector_[node->id()] =
MachineRepresentation::kWord64;
......@@ -376,6 +380,9 @@ class MachineRepresentationChecker {
case IrOpcode::kRoundInt64ToFloat32:
case IrOpcode::kRoundUint64ToFloat32:
case IrOpcode::kTruncateInt64ToInt32:
case IrOpcode::kWord64Ctz:
case IrOpcode::kWord64Clz:
case IrOpcode::kWord64Popcnt:
CheckValueInputForInt64Op(node, 0);
break;
case IrOpcode::kBitcastWordToTagged:
......@@ -461,6 +468,7 @@ class MachineRepresentationChecker {
case IrOpcode::kBitcastWord32ToWord64:
case IrOpcode::kChangeInt32ToInt64:
case IrOpcode::kChangeUint32ToUint64:
case IrOpcode::kWord32Popcnt:
MACHINE_UNOP_32_LIST(LABEL) { CheckValueInputForInt32Op(node, 0); }
break;
case IrOpcode::kWord32Equal:
......
......@@ -539,6 +539,14 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
Node* Word32PairSar(Node* low_word, Node* high_word, Node* shift) {
return AddNode(machine()->Word32PairSar(), low_word, high_word, shift);
}
Node* Word32Popcnt(Node* a) {
return AddNode(machine()->Word32Popcnt().op(), a);
}
Node* Word64Popcnt(Node* a) {
return AddNode(machine()->Word64Popcnt().op(), a);
}
Node* Word32Ctz(Node* a) { return AddNode(machine()->Word32Ctz().op(), a); }
Node* Word64Ctz(Node* a) { return AddNode(machine()->Word64Ctz().op(), a); }
Node* StackPointerGreaterThan(Node* value) {
return AddNode(
machine()->StackPointerGreaterThan(StackCheckKind::kCodeStubAssembler),
......
......@@ -4221,6 +4221,86 @@ TEST(SmiUntagComparisonOptimization) {
FunctionTester ft(asm_tester.GenerateCode(options), kNumParams);
}
TEST(PopCount) {
Isolate* isolate(CcTest::InitIsolateOnce());
CodeAssemblerTester asm_tester(isolate);
CodeStubAssembler m(asm_tester.state());
const std::vector<std::pair<uint32_t, int>> test_cases = {
{0, 0},
{1, 1},
{(1 << 31), 1},
{0b01010101010101010101010101010101, 16},
{0b10101010101010101010101010101010, 16},
{0b11100011100000011100011111000111, 17} // arbitrarily chosen
};
for (std::pair<uint32_t, int> test_case : test_cases) {
uint32_t value32 = test_case.first;
uint64_t value64 = (static_cast<uint64_t>(value32) << 32) | value32;
int expected_pop32 = test_case.second;
int expected_pop64 = 2 * expected_pop32;
TNode<Int32T> pop32 = m.Word32PopulationCount(m.Uint32Constant(value32));
CSA_CHECK(&m, m.Word32Equal(pop32, m.Int32Constant(expected_pop32)));
if (m.Is64()) {
// TODO(emrich): enable once 64-bit operations are supported on 32-bit
// architectures.
TNode<Int64T> pop64 = m.Word64PopulationCount(m.Uint64Constant(value64));
CSA_CHECK(&m, m.Word64Equal(pop64, m.Int64Constant(expected_pop64)));
}
}
m.Return(m.UndefinedConstant());
FunctionTester ft(asm_tester.GenerateCode());
ft.Call();
}
TEST(CountTrailingZeros) {
Isolate* isolate(CcTest::InitIsolateOnce());
CodeAssemblerTester asm_tester(isolate);
CodeStubAssembler m(asm_tester.state());
const std::vector<std::pair<uint32_t, int>> test_cases = {
{1, 0},
{2, 1},
{(0b0101010'0000'0000), 9},
{(1 << 31), 31},
{std::numeric_limits<uint32_t>::max(), 0},
};
for (std::pair<uint32_t, int> test_case : test_cases) {
uint32_t value32 = test_case.first;
uint64_t value64 = static_cast<uint64_t>(value32) << 32;
int expected_ctz32 = test_case.second;
int expected_ctz64 = expected_ctz32 + 32;
TNode<Int32T> pop32 = m.Word32CountTrailingZeros(m.Uint32Constant(value32));
CSA_CHECK(&m, m.Word32Equal(pop32, m.Int32Constant(expected_ctz32)));
if (m.Is64()) {
// TODO(emrich): enable once 64-bit operations are supported on 32-bit
// architectures.
TNode<Int64T> pop64_ext =
m.Word64CountTrailingZeros(m.Uint64Constant(value32));
TNode<Int64T> pop64 =
m.Word64CountTrailingZeros(m.Uint64Constant(value64));
CSA_CHECK(&m, m.Word64Equal(pop64_ext, m.Int64Constant(expected_ctz32)));
CSA_CHECK(&m, m.Word64Equal(pop64, m.Int64Constant(expected_ctz64)));
}
}
m.Return(m.UndefinedConstant());
FunctionTester ft(asm_tester.GenerateCode());
ft.Call();
}
} // namespace compiler
} // namespace internal
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment