Commit 11de0762 authored by Clemens Backes's avatar Clemens Backes Committed by V8 LUCI CQ

[utils] Improve BitVector performance

Avoid most of the {is_inline()} checks by having a {data_begin_} pointer
which either points at the inline storage or at the zone-allocated
memory.
This replaces a dynamic branch by a memory indirection, which is
beneficial for big (non-inline) BitVectors. For small BitVectors we will
have to see what the bots say; the hypothesis is that a memory load is
still faster than a dynamic branch.

Apart from better performance, this change allows for simpler code in
many places, including the iterator implementation.

R=jkummerow@chromium.org

Bug: v8:13063
Change-Id: I1e28279d1a438598e0b8403a6a4078c2cd2a4c48
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3776685Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81902}
parent 0998bbe6
......@@ -4,6 +4,8 @@
#include "src/utils/bit-vector.h"
#include <numeric>
#include "src/base/bits.h"
#include "src/utils/utils.h"
......@@ -26,13 +28,10 @@ void BitVector::Print() const {
#endif
int BitVector::Count() const {
if (is_inline()) return base::bits::CountPopulation(data_.inline_);
int count = 0;
for (int i = 0; i < data_length_; i++) {
count += base::bits::CountPopulation(data_.ptr_[i]);
}
return count;
auto accumulate_popcnt = [](int cnt, uintptr_t word) -> int {
return cnt + base::bits::CountPopulation(word);
};
return std::accumulate(data_begin_, data_end_, 0, accumulate_popcnt);
}
} // namespace internal
......
......@@ -15,50 +15,49 @@ namespace internal {
class V8_EXPORT_PRIVATE BitVector : public ZoneObject {
public:
union DataStorage {
uintptr_t* ptr_; // valid if data_length_ > 1
uintptr_t inline_; // valid if data_length_ == 1
explicit DataStorage(uintptr_t value) : inline_(value) {}
};
// Iterator for the elements of this BitVector.
class Iterator {
public:
V8_EXPORT_PRIVATE inline void operator++() {
current_++;
// Skip zeroed words.
while (current_value_ == 0) {
current_index_++;
if (Done()) return;
DCHECK(!target_->is_inline());
current_value_ = target_->data_.ptr_[current_index_];
current_ = current_index_ << kDataBitShift;
int bit_in_word = current_index_ & (kDataBits - 1);
if (bit_in_word < kDataBits - 1) {
uintptr_t remaining_bits = *ptr_ >> (bit_in_word + 1);
if (remaining_bits) {
int next_bit_in_word = base::bits::CountTrailingZeros(remaining_bits);
current_index_ += next_bit_in_word + 1;
return;
}
}
// Skip zeroed bits.
uintptr_t trailing_zeros = base::bits::CountTrailingZeros(current_value_);
current_ += trailing_zeros;
current_value_ >>= trailing_zeros;
// Move {current_index_} down to the beginning of the current word, before
// starting to search for the next non-empty word.
current_index_ = RoundDown(current_index_, kDataBits);
do {
++ptr_;
current_index_ += kDataBits;
if (ptr_ == end_) return;
} while (*ptr_ == 0);
// Get current_value ready for next advance.
current_value_ >>= 1;
uintptr_t trailing_zeros = base::bits::CountTrailingZeros(*ptr_);
current_index_ += trailing_zeros;
}
int operator*() const {
DCHECK(!Done());
return current_;
DCHECK_NE(end_, ptr_);
DCHECK(target_->Contains(current_index_));
return current_index_;
}
bool operator!=(const Iterator& other) const {
// "other" is required to be the end sentinel value, to avoid us needing
// to compare exact "current" values.
DCHECK(other.Done());
bool operator==(const Iterator& other) const {
DCHECK_EQ(target_, other.target_);
return current_index_ != other.current_index_;
DCHECK_EQ(end_, other.end_);
DCHECK_IMPLIES(current_index_ == other.current_index_,
ptr_ == other.ptr_);
return current_index_ == other.current_index_;
}
bool operator!=(const Iterator& other) const { return !(*this == other); }
private:
static constexpr struct StartTag {
} kStartTag = {};
......@@ -66,235 +65,168 @@ class V8_EXPORT_PRIVATE BitVector : public ZoneObject {
} kEndTag = {};
explicit Iterator(const BitVector* target, StartTag)
: target_(target),
current_value_(target->is_inline() ? target->data_.inline_
: target->data_.ptr_[0]),
current_index_(0),
current_(-1) {
++(*this);
:
#ifdef DEBUG
target_(target),
#endif
ptr_(target->data_begin_),
end_(target->data_end_),
current_index_(0) {
DCHECK_LT(ptr_, end_);
while (*ptr_ == 0) {
++ptr_;
current_index_ += kDataBits;
if (ptr_ == end_) return;
}
explicit Iterator(const BitVector* target, EndTag)
: target_(target),
current_value_(0),
current_index_(target->data_length_),
current_(-1) {
DCHECK(Done());
current_index_ += base::bits::CountTrailingZeros(*ptr_);
}
bool Done() const { return current_index_ >= target_->data_length_; }
explicit Iterator(const BitVector* target, EndTag)
:
#ifdef DEBUG
target_(target),
#endif
ptr_(target->data_end_),
end_(target->data_end_),
current_index_(target->data_length() * kDataBits) {
}
#ifdef DEBUG
const BitVector* target_;
uintptr_t current_value_;
#endif
uintptr_t* ptr_;
uintptr_t* end_;
int current_index_;
int current_;
friend class BitVector;
};
static const int kDataLengthForInline = 1;
static const int kDataBits = kBitsPerSystemPointer;
static const int kDataBitShift = kBitsPerSystemPointerLog2;
static const uintptr_t kOne = 1; // This saves some static_casts.
static constexpr int kDataBits = kBitsPerSystemPointer;
static constexpr int kDataBitShift = kBitsPerSystemPointerLog2;
BitVector() = default;
BitVector(int length, Zone* zone)
: length_(length), data_length_(SizeFor(length)) {
BitVector(int length, Zone* zone) : length_(length) {
DCHECK_LE(0, length);
if (!is_inline()) {
data_.ptr_ = zone->NewArray<uintptr_t>(data_length_);
Clear();
int data_length = (length + kDataBits - 1) >> kDataBitShift;
if (data_length > 1) {
data_.ptr_ = zone->NewArray<uintptr_t>(data_length);
std::fill_n(data_.ptr_, data_length, 0);
data_begin_ = data_.ptr_;
data_end_ = data_begin_ + data_length;
}
// Otherwise, clearing is implicit
}
BitVector(const BitVector& other, Zone* zone)
: length_(other.length_),
data_length_(other.data_length_),
data_(other.data_.inline_) {
if (!is_inline()) {
data_.ptr_ = zone->NewArray<uintptr_t>(data_length_);
std::copy_n(other.data_.ptr_, other.data_length_, data_.ptr_);
}
}
static int SizeFor(int length) {
if (length <= kDataBits) {
return kDataLengthForInline;
: length_(other.length_), data_(other.data_.inline_) {
if (!other.is_inline()) {
int data_length = other.data_length();
DCHECK_LT(1, data_length);
data_.ptr_ = zone->NewArray<uintptr_t>(data_length);
data_begin_ = data_.ptr_;
data_end_ = data_begin_ + data_length;
std::copy_n(other.data_begin_, data_length, data_begin_);
}
int data_length = 1 + ((length - 1) / kDataBits);
DCHECK_GT(data_length, kDataLengthForInline);
return data_length;
}
void CopyFrom(const BitVector& other) {
DCHECK_EQ(other.length(), length());
if (is_inline()) {
DCHECK(other.is_inline());
data_.inline_ = other.data_.inline_;
} else {
std::copy_n(other.data_.ptr_, data_length_, data_.ptr_);
}
DCHECK_EQ(is_inline(), other.is_inline());
std::copy_n(other.data_begin_, data_length(), data_begin_);
}
void Resize(int new_length, Zone* zone) {
DCHECK_GT(new_length, length());
int new_data_length = SizeFor(new_length);
if (new_data_length > data_length_) {
DataStorage old_data = data_;
int old_data_length = data_length_;
// Make sure the new data length is large enough to need allocation.
DCHECK_GT(new_data_length, kDataLengthForInline);
data_.ptr_ = zone->NewArray<uintptr_t>(new_data_length);
data_length_ = new_data_length;
int old_data_length = data_length();
DCHECK_LE(1, old_data_length);
int new_data_length = (new_length + kDataBits - 1) >> kDataBitShift;
if (new_data_length > old_data_length) {
uintptr_t* new_data = zone->NewArray<uintptr_t>(new_data_length);
// Copy over the data.
if (old_data_length == kDataLengthForInline) {
data_.ptr_[0] = old_data.inline_;
} else {
std::copy_n(old_data.ptr_, old_data_length, data_.ptr_);
}
std::copy_n(data_begin_, old_data_length, new_data);
// Zero out the rest of the data.
std::fill(data_.ptr_ + old_data_length, data_.ptr_ + data_length_, 0);
std::fill(new_data + old_data_length, new_data + new_data_length, 0);
data_begin_ = new_data;
data_end_ = new_data + new_data_length;
}
length_ = new_length;
}
bool Contains(int i) const {
DCHECK(i >= 0 && i < length());
uintptr_t block = is_inline() ? data_.inline_ : data_.ptr_[i / kDataBits];
return (block & (kOne << (i % kDataBits))) != 0;
return (data_begin_[word(i)] & bit(i)) != 0;
}
void Add(int i) {
DCHECK(i >= 0 && i < length());
if (is_inline()) {
data_.inline_ |= (kOne << i);
} else {
data_.ptr_[i / kDataBits] |= (kOne << (i % kDataBits));
}
data_begin_[word(i)] |= bit(i);
}
void AddAll() {
// TODO(leszeks): This sets bits outside of the length of this bit-vector,
// which is observable if we resize it or copy from it. If this is a
// problem, we should clear the high bits either on add, or on resize/copy.
if (is_inline()) {
data_.inline_ = -1;
} else {
memset(data_.ptr_, -1, sizeof(uintptr_t) * data_length_);
}
memset(data_begin_, -1, sizeof(*data_begin_) * data_length());
}
void Remove(int i) {
DCHECK(i >= 0 && i < length());
if (is_inline()) {
data_.inline_ &= ~(kOne << i);
} else {
data_.ptr_[i / kDataBits] &= ~(kOne << (i % kDataBits));
}
data_begin_[word(i)] &= ~bit(i);
}
void Union(const BitVector& other) {
DCHECK(other.length() == length());
if (is_inline()) {
DCHECK(other.is_inline());
data_.inline_ |= other.data_.inline_;
} else {
for (int i = 0; i < data_length_; i++) {
data_.ptr_[i] |= other.data_.ptr_[i];
}
DCHECK_EQ(other.length(), length());
for (int i = 0; i < data_length(); i++) {
data_begin_[i] |= other.data_begin_[i];
}
}
bool UnionIsChanged(const BitVector& other) {
DCHECK(other.length() == length());
if (is_inline()) {
DCHECK(other.is_inline());
uintptr_t old_data = data_.inline_;
data_.inline_ |= other.data_.inline_;
return data_.inline_ != old_data;
} else {
bool changed = false;
for (int i = 0; i < data_length_; i++) {
uintptr_t old_data = data_.ptr_[i];
data_.ptr_[i] |= other.data_.ptr_[i];
if (data_.ptr_[i] != old_data) changed = true;
for (int i = 0; i < data_length(); i++) {
uintptr_t old_data = data_begin_[i];
data_begin_[i] |= other.data_begin_[i];
if (data_begin_[i] != old_data) changed = true;
}
return changed;
}
}
void Intersect(const BitVector& other) {
DCHECK(other.length() == length());
if (is_inline()) {
DCHECK(other.is_inline());
data_.inline_ &= other.data_.inline_;
} else {
for (int i = 0; i < data_length_; i++) {
data_.ptr_[i] &= other.data_.ptr_[i];
}
for (int i = 0; i < data_length(); i++) {
data_begin_[i] &= other.data_begin_[i];
}
}
bool IntersectIsChanged(const BitVector& other) {
DCHECK(other.length() == length());
if (is_inline()) {
DCHECK(other.is_inline());
uintptr_t old_data = data_.inline_;
data_.inline_ &= other.data_.inline_;
return data_.inline_ != old_data;
} else {
bool changed = false;
for (int i = 0; i < data_length_; i++) {
uintptr_t old_data = data_.ptr_[i];
data_.ptr_[i] &= other.data_.ptr_[i];
if (data_.ptr_[i] != old_data) changed = true;
for (int i = 0; i < data_length(); i++) {
uintptr_t old_data = data_begin_[i];
data_begin_[i] &= other.data_begin_[i];
if (data_begin_[i] != old_data) changed = true;
}
return changed;
}
}
void Subtract(const BitVector& other) {
DCHECK(other.length() == length());
if (is_inline()) {
DCHECK(other.is_inline());
data_.inline_ &= ~other.data_.inline_;
} else {
for (int i = 0; i < data_length_; i++) {
data_.ptr_[i] &= ~other.data_.ptr_[i];
}
for (int i = 0; i < data_length(); i++) {
data_begin_[i] &= ~other.data_begin_[i];
}
}
void Clear() {
if (is_inline()) {
data_.inline_ = 0;
} else {
std::fill_n(data_.ptr_, data_length_, 0);
}
}
void Clear() { std::fill_n(data_begin_, data_length(), 0); }
bool IsEmpty() const {
if (is_inline()) {
return data_.inline_ == 0;
} else {
return std::all_of(data_.ptr_, data_.ptr_ + data_length_,
std::logical_not<uintptr_t>{});
}
return std::all_of(data_begin_, data_end_, std::logical_not<uintptr_t>{});
}
bool Equals(const BitVector& other) const {
DCHECK(other.length() == length());
if (is_inline()) {
DCHECK(other.is_inline());
return data_.inline_ == other.data_.inline_;
} else {
return std::equal(data_.ptr_, data_.ptr_ + data_length_,
other.data_.ptr_);
}
return std::equal(data_begin_, data_end_, other.data_begin_);
}
int Count() const;
......@@ -312,11 +244,28 @@ class V8_EXPORT_PRIVATE BitVector : public ZoneObject {
MOVE_ONLY_NO_DEFAULT_CONSTRUCTOR(BitVector);
private:
union DataStorage {
uintptr_t* ptr_; // valid if >1 machine word is needed
uintptr_t inline_; // valid if <=1 machine word is needed
explicit DataStorage(uintptr_t value) : inline_(value) {}
};
bool is_inline() const { return data_begin_ == &data_.inline_; }
int data_length() const { return static_cast<int>(data_end_ - data_begin_); }
V8_INLINE static int word(int index) {
V8_ASSUME(index >= 0);
return index >> kDataBitShift;
}
V8_INLINE static uintptr_t bit(int index) {
return uintptr_t{1} << (index & (kDataBits - 1));
}
int length_ = 0;
int data_length_ = kDataLengthForInline;
DataStorage data_{0};
bool is_inline() const { return data_length_ == kDataLengthForInline; }
uintptr_t* data_begin_ = &data_.inline_;
uintptr_t* data_end_ = &data_.inline_ + 1;
};
class GrowableBitVector {
......
......@@ -126,5 +126,40 @@ TEST_F(BitVectorTest, Resize) {
EXPECT_TRUE(!v.Contains(243));
}
TEST_F(BitVectorTest, BigBitVectorIterator) {
// Big BitVector with big and small entries.
BitVector v(500, zone());
v.Add(27);
v.Add(300);
v.Add(499);
auto iter = v.begin();
auto end = v.end();
EXPECT_NE(iter, end);
EXPECT_EQ(27, *iter);
++iter;
EXPECT_NE(iter, end);
EXPECT_EQ(300, *iter);
++iter;
EXPECT_NE(iter, end);
EXPECT_EQ(499, *iter);
++iter;
EXPECT_EQ(iter, end);
// Remove small entries, add another big one.
v.Resize(1000, zone());
v.Remove(27);
v.Remove(300);
v.Add(500);
iter = v.begin();
end = v.end();
EXPECT_NE(iter, end);
EXPECT_EQ(499, *iter);
++iter;
EXPECT_NE(iter, end);
EXPECT_EQ(500, *iter);
++iter;
EXPECT_EQ(iter, end);
}
} // namespace internal
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment