Commit cd5f286d authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[bigint] Faster parsing when radix is a power of 2

No multiplications needed, just putting bits directly into
the right places.

Bug: v8:11515
Change-Id: I65e5658bb5ed12caec9325f414563526f8edbbf3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3055291
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarMaya Lekova <mslekova@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76727}
parent c9704cf7
......@@ -71,6 +71,7 @@ class ProcessorImpl : public Processor {
void FromString(RWDigits Z, FromStringAccumulator* accumulator);
void FromStringClassic(RWDigits Z, FromStringAccumulator* accumulator);
void FromStringLarge(RWDigits Z, FromStringAccumulator* accumulator);
void FromStringBasePowerOfTwo(RWDigits Z, FromStringAccumulator* accumulator);
bool should_terminate() { return status_ == Status::kInterrupted; }
......
......@@ -362,8 +362,12 @@ class FromStringAccumulator {
private:
friend class ProcessorImpl;
ALWAYS_INLINE bool AddPart(digit_t multiplier, digit_t part,
bool is_last = false);
template <class Char>
ALWAYS_INLINE const Char* ParsePowerTwo(const Char* start, const Char* end,
digit_t radix);
ALWAYS_INLINE bool AddPart(digit_t multiplier, digit_t part, bool is_last);
ALWAYS_INLINE bool AddPart(digit_t part, bool is_last);
digit_t stack_parts_[kStackParts];
std::vector<digit_t> heap_parts_;
......@@ -373,6 +377,7 @@ class FromStringAccumulator {
Result result_{Result::kOk};
int stack_parts_used_{0};
bool inline_everything_{false};
uint8_t radix_{0};
};
// The rest of this file is the inlineable implementation of
......@@ -405,6 +410,47 @@ static constexpr uint8_t kCharValue[] = {
25, 26, 27, 28, 29, 30, 31, 32, // 112..119
33, 34, 35, 255, 255, 255, 255, 255, // 120..127 'z' == 122
};
// A space- and time-efficient way to map {2,4,8,16,32} to {1,2,3,4,5}.
static constexpr uint8_t kCharBits[] = {1, 2, 3, 0, 4, 0, 0, 0, 5};
template <class Char>
const Char* FromStringAccumulator::ParsePowerTwo(const Char* current,
const Char* end,
digit_t radix) {
radix_ = static_cast<uint8_t>(radix);
const int char_bits = kCharBits[radix >> 2];
int bits_left;
bool done = false;
do {
digit_t part = 0;
bits_left = kDigitBits;
while (true) {
digit_t d; // Numeric value of the current character {c}.
uint32_t c = *current;
if (c > 127 || (d = bigint::kCharValue[c]) >= radix) {
done = true;
break;
}
if (bits_left < char_bits) break;
bits_left -= char_bits;
part = (part << char_bits) | d;
++current;
if (current == end) {
done = true;
break;
}
}
if (!AddPart(part, done)) return current;
} while (!done);
// We use the unused {last_multiplier_} field to
// communicate how many bits are unused in the last part.
last_multiplier_ = bits_left;
return current;
}
template <class Char>
const Char* FromStringAccumulator::Parse(const Char* start, const Char* end,
digit_t radix) {
......@@ -419,12 +465,15 @@ const Char* FromStringAccumulator::Parse(const Char* start, const Char* end,
static constexpr int kInlineThreshold = kStackParts * kDigitBits * 100 / 517;
inline_everything_ = (end - start) <= kInlineThreshold;
#endif
if (!inline_everything_ && (radix & (radix - 1)) == 0) {
return ParsePowerTwo(start, end, radix);
}
bool done = false;
do {
digit_t multiplier = 1;
digit_t part = 0;
while (true) {
digit_t d;
digit_t d; // Numeric value of the current character {c}.
uint32_t c = *current;
if (c > 127 || (d = bigint::kCharValue[c]) >= radix) {
done = true;
......@@ -480,6 +529,10 @@ bool FromStringAccumulator::AddPart(digit_t multiplier, digit_t part,
BIGINT_H_DCHECK(max_multiplier_ == 0 || max_multiplier_ == multiplier);
max_multiplier_ = multiplier;
}
return AddPart(part, is_last);
}
bool FromStringAccumulator::AddPart(digit_t part, bool is_last) {
if (stack_parts_used_ < kStackParts) {
stack_parts_[stack_parts_used_++] = part;
return true;
......
......@@ -212,6 +212,97 @@ void ProcessorImpl::FromStringLarge(RWDigits Z,
}
}
// Specialized algorithms for power-of-two radixes. Designed to work with
// {ParsePowerTwo}: {max_multiplier_} isn't saved, but {radix_} is, and
// {last_multiplier_} has special meaning, namely the number of unpopulated bits
// in the last part.
// For these radixes, {parts} already is a list of correct bit sequences, we
// just have to put them together in the right way:
// - The parts are currently in reversed order. The highest-index parts[i]
// will go into Z[0].
// - All parts, possibly except for the last, are maximally populated.
// - A maximally populated part stores a non-fractional number of characters,
// i.e. the largest fitting multiple of {char_bits} of it is populated.
// - The populated bits in a part are at the low end.
// - The number of unused bits in the last part is stored in
// {accumulator->last_multiplier_}.
//
// Example: Given the following parts vector, where letters are used to
// label bits, bit order is big endian (i.e. [00000101] encodes "5"),
// 'x' means "unpopulated", kDigitBits == 8, radix == 8, and char_bits == 3:
//
// parts[0] -> [xxABCDEF][xxGHIJKL][xxMNOPQR][xxxxxSTU] <- parts[3]
//
// We have to assemble the following result:
//
// Z[0] -> [NOPQRSTU][FGHIJKLM][xxxABCDE] <- Z[2]
//
void ProcessorImpl::FromStringBasePowerOfTwo(
RWDigits Z, FromStringAccumulator* accumulator) {
const int num_parts = accumulator->ResultLength();
DCHECK(num_parts >= 1); // NOLINT(readability/check)
DCHECK(Z.len() >= num_parts);
Digits parts(accumulator->heap_parts_.size() > 0
? accumulator->heap_parts_.data()
: accumulator->stack_parts_,
num_parts);
uint8_t radix = accumulator->radix_;
DCHECK(radix == 2 || radix == 4 || radix == 8 || radix == 16 || radix == 32);
const int char_bits = BitLength(radix - 1);
const int unused_last_part_bits =
static_cast<int>(accumulator->last_multiplier_);
const int unused_part_bits = kDigitBits % char_bits;
const int max_part_bits = kDigitBits - unused_part_bits;
int z_index = 0;
int part_index = num_parts - 1;
// If the last part is fully populated, then all parts must be, and we can
// simply copy them (in reversed order).
if (unused_last_part_bits == 0) {
DCHECK(kDigitBits % char_bits == 0); // NOLINT(readability/check)
while (part_index >= 0) {
Z[z_index++] = parts[part_index--];
}
for (; z_index < Z.len(); z_index++) Z[z_index] = 0;
return;
}
// Otherwise we have to shift parts contents around as needed.
// Holds the next Z digit that we want to store...
digit_t digit = parts[part_index--];
// ...and the number of bits (at the right end) we already know.
int digit_bits = kDigitBits - unused_last_part_bits;
while (part_index >= 0) {
// Holds the last part that we read from {parts}...
digit_t part;
// ...and the number of bits (at the right end) that we haven't used yet.
int part_bits;
while (digit_bits < kDigitBits) {
part = parts[part_index--];
part_bits = max_part_bits;
digit |= part << digit_bits;
int part_shift = kDigitBits - digit_bits;
if (part_shift > part_bits) {
digit_bits += part_bits;
part = 0;
part_bits = 0;
if (part_index < 0) break;
} else {
digit_bits = kDigitBits;
part >>= part_shift;
part_bits -= part_shift;
}
}
Z[z_index++] = digit;
digit = part;
digit_bits = part_bits;
}
if (digit_bits > 0) {
Z[z_index++] = digit;
}
for (; z_index < Z.len(); z_index++) Z[z_index] = 0;
}
void ProcessorImpl::FromString(RWDigits Z, FromStringAccumulator* accumulator) {
if (accumulator->inline_everything_) {
int i = 0;
......@@ -221,6 +312,8 @@ void ProcessorImpl::FromString(RWDigits Z, FromStringAccumulator* accumulator) {
for (; i < Z.len(); i++) Z[i] = 0;
} else if (accumulator->stack_parts_used_ == 0) {
for (int i = 0; i < Z.len(); i++) Z[i] = 0;
} else if (IsPowerOfTwo(accumulator->radix_)) {
FromStringBasePowerOfTwo(Z, accumulator);
} else if (accumulator->ResultLength() < kFromStringLargeThreshold) {
FromStringClassic(Z, accumulator);
} else {
......
......@@ -29,13 +29,14 @@ int PrintHelp(char** argv) {
return 1;
}
#define TESTS(V) \
V(kBarrett, "barrett") \
V(kBurnikel, "burnikel") \
V(kFFT, "fft") \
V(kFromString, "fromstring") \
V(kKaratsuba, "karatsuba") \
V(kToom, "toom") \
#define TESTS(V) \
V(kBarrett, "barrett") \
V(kBurnikel, "burnikel") \
V(kFFT, "fft") \
V(kFromString, "fromstring") \
V(kFromStringBase2, "fromstring2") \
V(kKaratsuba, "karatsuba") \
V(kToom, "toom") \
V(kToString, "tostring")
enum Operation { kNoOp, kList, kTest };
......@@ -215,6 +216,10 @@ class Runner {
for (int i = 0; i < runs_; i++) {
TestFromString(&count);
}
} else if (test_ == kFromStringBase2) {
for (int i = 0; i < runs_; i++) {
TestFromStringBaseTwo(&count);
}
} else {
DCHECK(false); // Unreachable.
}
......@@ -413,8 +418,18 @@ class Runner {
constexpr int kMax = kFromStringLargeThreshold * 2;
for (int size = kMin; size < kMax; size++) {
// To keep test execution times low, test one random radix every time.
// Valid range is 2 <= radix <= 36 (inclusive).
int radix = 2 + (rng_.NextUint64() % 35);
// Generally, radixes 2 through 36 (inclusive) are supported; however
// the functions {FromStringLarge} and {FromStringClassic} can't deal
// with the data format that {Parse} creates for power-of-two radixes,
// so we skip power-of-two radixes here (and test them separately below).
// We round up the number of radixes in the list to 32 by padding with
// 10, giving decimal numbers extra test coverage, and making it easy
// to evenly map a random number into the index space.
constexpr uint8_t radixes[] = {3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 33, 34, 35, 36, 10, 10};
int radix_index = (rng_.NextUint64() & 31);
int radix = radixes[radix_index];
int num_chars = std::round(size * kDigitBits / std::log2(radix));
std::unique_ptr<char[]> chars(new char[num_chars]);
GenerateRandomString(chars.get(), num_chars, radix);
......@@ -434,6 +449,38 @@ class Runner {
}
}
void TestFromStringBaseTwo(int* count) {
constexpr int kMaxDigits = 1 << 20; // Any large-enough value will do.
constexpr int kMin = 1;
constexpr int kMax = 100;
for (int size = kMin; size < kMax; size++) {
ScratchDigits X(size);
GenerateRandom(X);
for (int bits = 1; bits <= 5; bits++) {
int radix = 1 << bits;
int chars_required = ToStringResultLength(X, radix, false);
int string_len = chars_required;
std::unique_ptr<char[]> chars(new char[string_len]);
processor()->ToStringImpl(chars.get(), &string_len, X, radix, false,
true);
// Fill any remaining allocated characters with garbage to test that
// too.
for (int i = string_len; i < chars_required; i++) {
chars[i] = '?';
}
const char* start = chars.get();
const char* end = start + chars_required;
FromStringAccumulator accumulator(kMaxDigits);
accumulator.Parse(start, end, radix);
ScratchDigits result(accumulator.ResultLength());
processor()->FromString(result, &accumulator);
AssertEquals(start, chars_required, radix, X, result);
if (error_) return;
(*count)++;
}
}
}
int ParseOptions(int argc, char** argv) {
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--list") == 0) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment