Commit 485c15c1 authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[bigint] Barrett-Newton division

Dividing by first computing a multiplicative inverse is faster than
Burnikel-Ziegler division for very large inputs.

Bug: v8:11515
Change-Id: Ice45690c3fa4eef7102d418cdd3d82a942a076c5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3015573
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarMaya Lekova <mslekova@chromium.org>
Cr-Commit-Position: refs/heads/master@{#75743}
parent e1f76d4b
......@@ -2673,6 +2673,7 @@ filegroup(
"src/bigint/bigint-internal.h",
"src/bigint/bigint.h",
"src/bigint/digit-arithmetic.h",
"src/bigint/div-barrett.cc",
"src/bigint/div-burnikel.cc",
"src/bigint/div-helpers.cc",
"src/bigint/div-helpers.h",
......
......@@ -4969,6 +4969,7 @@ v8_source_set("v8_bigint") {
if (v8_advanced_bigint_algorithms) {
sources += [
"src/bigint/div-barrett.cc",
"src/bigint/mul-fft.cc",
"src/bigint/mul-toom.cc",
]
......
......@@ -58,7 +58,16 @@ void ProcessorImpl::Divide(RWDigits Q, Digits A, Digits B) {
if (B.len() < kBurnikelThreshold) {
return DivideSchoolbook(Q, RWDigits(nullptr, 0), A, B);
}
#if !V8_ADVANCED_BIGINT_ALGORITHMS
return DivideBurnikelZiegler(Q, RWDigits(nullptr, 0), A, B);
#else
if (B.len() < kBarrettThreshold || A.len() == B.len()) {
DivideBurnikelZiegler(Q, RWDigits(nullptr, 0), A, B);
} else {
ScratchDigits R(B.len());
DivideBarrett(Q, R, A, B);
}
#endif
}
void ProcessorImpl::Modulo(RWDigits R, Digits A, Digits B) {
......@@ -84,7 +93,15 @@ void ProcessorImpl::Modulo(RWDigits R, Digits A, Digits B) {
}
int q_len = DivideResultLength(A, B);
ScratchDigits Q(q_len);
#if !V8_ADVANCED_BIGINT_ALGORITHMS
return DivideBurnikelZiegler(Q, R, A, B);
#else
if (B.len() < kBarrettThreshold || A.len() == B.len()) {
DivideBurnikelZiegler(Q, R, A, B);
} else {
DivideBarrett(Q, R, A, B);
}
#endif
}
Status Processor::Multiply(RWDigits Z, Digits X, Digits Y) {
......
......@@ -18,6 +18,8 @@ constexpr int kFftThreshold = 1500;
constexpr int kFftInnerThreshold = 200;
constexpr int kBurnikelThreshold = 57;
constexpr int kNewtonInversionThreshold = 50;
// kBarrettThreshold is defined in bigint.h.
class ProcessorImpl : public Processor {
public:
......@@ -47,6 +49,14 @@ class ProcessorImpl : public Processor {
void Toom3Main(RWDigits Z, Digits X, Digits Y);
void MultiplyFFT(RWDigits Z, Digits X, Digits Y);
void DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B);
void DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B, Digits I,
RWDigits scratch);
void Invert(RWDigits Z, Digits V, RWDigits scratch);
void InvertBasecase(RWDigits Z, Digits V, RWDigits scratch);
void InvertNewton(RWDigits Z, Digits V, RWDigits scratch);
#endif // V8_ADVANCED_BIGINT_ALGORITHMS
// {out_length} initially contains the allocated capacity of {out}, and
......
......@@ -274,8 +274,15 @@ inline int SubtractSignedResultLength(int x_length, int y_length,
inline int MultiplyResultLength(Digits X, Digits Y) {
return X.len() + Y.len();
}
constexpr int kBarrettThreshold = 13310;
inline int DivideResultLength(Digits A, Digits B) {
return A.len() - B.len() + 1;
#if V8_ADVANCED_BIGINT_ALGORITHMS
// The Barrett division algorithm needs one extra digit for temporary use.
int kBarrettExtraScratch = B.len() >= kBarrettThreshold ? 1 : 0;
#else
constexpr int kBarrettExtraScratch = 0;
#endif
return A.len() - B.len() + 1 + kBarrettExtraScratch;
}
inline int ModuloResultLength(Digits B) { return B.len(); }
......
This diff is collapsed.
......@@ -201,5 +201,7 @@ void ProcessorImpl::KaratsubaMain(RWDigits Z, Digits X, Digits Y,
USE(overflow);
}
#undef MAYBE_TERMINATE
} // namespace bigint
} // namespace v8
......@@ -45,6 +45,9 @@ digit_t AddAndReturnCarry(RWDigits Z, Digits X, Digits Y);
digit_t SubtractAndReturnBorrow(RWDigits Z, Digits X, Digits Y);
inline bool IsDigitNormalized(Digits X) { return X.len() == 0 || X.msd() != 0; }
inline bool IsBitNormalized(Digits X) {
return (X.msd() >> (kDigitBits - 1)) == 1;
}
inline bool GreaterThanOrEqual(Digits A, Digits B) {
return Compare(A, B) >= 0;
......
......@@ -28,8 +28,11 @@ int PrintHelp(char** argv) {
}
#define TESTS(V) \
V(kBarrett, "barrett") \
V(kBurnikel, "burnikel") \
V(kFFT, "fft") \
V(kKaratsuba, "karatsuba") \
V(kBurnikel, "burnikel") V(kToom, "toom") V(kFFT, "fft")
V(kToom, "toom")
enum Operation { kNoOp, kList, kTest };
......@@ -157,22 +160,26 @@ class Runner {
int RunTest() {
int count = 0;
if (test_ == kKaratsuba) {
if (test_ == kBarrett) {
for (int i = 0; i < runs_; i++) {
TestKaratsuba(&count);
TestBarrett(&count);
}
} else if (test_ == kBurnikel) {
for (int i = 0; i < runs_; i++) {
TestBurnikel(&count);
}
} else if (test_ == kToom) {
for (int i = 0; i < runs_; i++) {
TestToom(&count);
}
} else if (test_ == kFFT) {
for (int i = 0; i < runs_; i++) {
TestFFT(&count);
}
} else if (test_ == kKaratsuba) {
for (int i = 0; i < runs_; i++) {
TestKaratsuba(&count);
}
} else if (test_ == kToom) {
for (int i = 0; i < runs_; i++) {
TestToom(&count);
}
} else {
DCHECK(false); // Unreachable.
}
......@@ -291,6 +298,56 @@ class Runner {
}
}
#if V8_ADVANCED_BIGINT_ALGORITHMS
void TestBarrett_Internal(int left_size, int right_size) {
ScratchDigits A(left_size);
ScratchDigits B(right_size);
GenerateRandom(A);
GenerateRandom(B);
int quotient_len = DivideResultLength(A, B);
// {DivideResultLength} doesn't expect to be called for sizes below
// {kBarrettThreshold} (which we do here to save time), so we have to
// manually adjust the allocated result length.
if (B.len() < kBarrettThreshold) quotient_len++;
int remainder_len = right_size;
ScratchDigits quotient(quotient_len);
ScratchDigits quotient_burnikel(quotient_len);
ScratchDigits remainder(remainder_len);
ScratchDigits remainder_burnikel(remainder_len);
processor()->DivideBarrett(quotient, remainder, A, B);
processor()->DivideBurnikelZiegler(quotient_burnikel, remainder_burnikel, A,
B);
AssertEquals(A, B, quotient_burnikel, quotient);
AssertEquals(A, B, remainder_burnikel, remainder);
}
void TestBarrett(int* count) {
// We pick a range around kBurnikelThreshold (instead of kBarrettThreshold)
// to save test execution time.
constexpr int kMin = kBurnikelThreshold / 2;
constexpr int kMax = 2 * kBurnikelThreshold;
// {DivideBarrett(A, B)} requires that A.len > B.len!
for (int right_size = kMin; right_size <= kMax; right_size++) {
for (int left_size = right_size + 1; left_size <= kMax; left_size++) {
TestBarrett_Internal(left_size, right_size);
if (error_) return;
(*count)++;
}
}
// We also test one random large case.
uint64_t random_bits = rng_.NextUint64();
int right_size = kBarrettThreshold + static_cast<int>(random_bits & 0x3FF);
random_bits >>= 10;
int left_size = right_size + 1 + static_cast<int>(random_bits & 0x3FFF);
random_bits >>= 14;
TestBarrett_Internal(left_size, right_size);
if (error_) return;
(*count)++;
}
#else
void TestBarrett(int* count) {}
#endif // V8_ADVANCED_BIGINT_ALGORITHMS
int ParseOptions(int argc, char** argv) {
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--list") == 0) {
......@@ -325,6 +382,9 @@ class Runner {
}
private:
// TODO(jkummerow): Also generate "non-random-looking" inputs, i.e. long
// strings of zeros and ones in the binary representation, such as
// ((1 << random) ± 1).
void GenerateRandom(RWDigits Z) {
if (Z.len() == 0) return;
if (sizeof(digit_t) == 8) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment