Commit 347ba357 authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[bigint] FFT-based multiplication

The Schönhage-Strassen method for *very* large inputs.

Bug: v8:11515
Change-Id: Ie8613f54928c9d3f6ff24e3102bc809de9f4496e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3000742
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarMaya Lekova <mslekova@chromium.org>
Cr-Commit-Position: refs/heads/master@{#75659}
parent dd8ee88c
......@@ -2510,6 +2510,7 @@ filegroup(
"src/bigint/div-helpers.cc",
"src/bigint/div-helpers.h",
"src/bigint/div-schoolbook.cc",
"src/bigint/mul-fft.cc",
"src/bigint/mul-karatsuba.cc",
"src/bigint/mul-schoolbook.cc",
"src/bigint/mul-toom.cc",
......
......@@ -4964,7 +4964,10 @@ v8_source_set("v8_bigint") {
]
if (v8_advanced_bigint_algorithms) {
sources += [ "src/bigint/mul-toom.cc" ]
sources += [
"src/bigint/mul-fft.cc",
"src/bigint/mul-toom.cc",
]
defines = [ "V8_ADVANCED_BIGINT_ALGORITHMS" ]
}
......
......@@ -35,7 +35,8 @@ void ProcessorImpl::Multiply(RWDigits Z, Digits X, Digits Y) {
return MultiplyKaratsuba(Z, X, Y);
#else
if (Y.len() < kToomThreshold) return MultiplyKaratsuba(Z, X, Y);
return MultiplyToomCook(Z, X, Y);
if (Y.len() < kFftThreshold) return MultiplyToomCook(Z, X, Y);
return MultiplyFFT(Z, X, Y);
#endif
}
......
......@@ -14,6 +14,9 @@ namespace bigint {
constexpr int kKaratsubaThreshold = 34;
constexpr int kToomThreshold = 193;
constexpr int kFftThreshold = 1500;
constexpr int kFftInnerThreshold = 200;
constexpr int kBurnikelThreshold = 57;
class ProcessorImpl : public Processor {
......@@ -42,6 +45,8 @@ class ProcessorImpl : public Processor {
#if V8_ADVANCED_BIGINT_ALGORITHMS
void MultiplyToomCook(RWDigits Z, Digits X, Digits Y);
void Toom3Main(RWDigits Z, Digits X, Digits Y);
void MultiplyFFT(RWDigits Z, Digits X, Digits Y);
#endif // V8_ADVANCED_BIGINT_ALGORITHMS
// {out_length} initially contains the allocated capacity of {out}, and
......
This diff is collapsed.
......@@ -123,11 +123,8 @@ void ProcessorImpl::Toom3Main(RWDigits Z, Digits X, Digits Y) {
// Phase 3a: Pointwise multiplication, steps 0, 1, m1.
Multiply(r_0, X0, Y0);
if (should_terminate()) return;
Multiply(r_1, p_1, q_1);
if (should_terminate()) return;
Multiply(r_m1, p_m1, q_m1);
if (should_terminate()) return;
bool r_m1_sign = p_m1_sign != q_m1_sign;
// Phase 2b: Evaluation, steps m2 and inf.
......@@ -152,14 +149,12 @@ void ProcessorImpl::Toom3Main(RWDigits Z, Digits X, Digits Y) {
MARK_INVALID(p_m1);
MARK_INVALID(q_m1);
Multiply(r_m2, p_m2, q_m2);
if (should_terminate()) return;
bool r_m2_sign = p_m2_sign != q_m2_sign;
RWDigits r_inf(t + r_len, r_len);
MARK_INVALID(p_m2);
MARK_INVALID(q_m2);
Multiply(r_inf, X2, Y2);
if (should_terminate()) return;
// Phase 4: Interpolation.
Digits R0 = r_0;
......@@ -215,7 +210,6 @@ void ProcessorImpl::MultiplyToomCook(RWDigits Z, Digits X, Digits Y) {
if (X.len() > Y.len()) {
ScratchDigits T(2 * k);
for (int i = k; i < X.len(); i += k) {
if (should_terminate()) return;
Digits Xi(X, i, k);
// TODO(jkummerow): would it be a measurable improvement to craft a
// "ToomChunk" method in the style of {KaratsubaChunk}?
......
......@@ -27,8 +27,9 @@ int PrintHelp(char** argv) {
return 1;
}
#define TESTS(V) \
V(kKaratsuba, "karatsuba") V(kBurnikel, "burnikel") V(kToom, "toom")
#define TESTS(V) \
V(kKaratsuba, "karatsuba") \
V(kBurnikel, "burnikel") V(kToom, "toom") V(kFFT, "fft")
enum Operation { kNoOp, kList, kTest };
......@@ -168,6 +169,10 @@ class Runner {
for (int i = 0; i < runs_; i++) {
TestToom(&count);
}
} else if (test_ == kFFT) {
for (int i = 0; i < runs_; i++) {
TestFFT(&count);
}
} else {
DCHECK(false); // Unreachable.
}
......@@ -225,6 +230,40 @@ class Runner {
#endif // V8_ADVANCED_BIGINT_ALGORITHMS
}
void TestFFT(int* count) {
#if V8_ADVANCED_BIGINT_ALGORITHMS
// Larger multiplications are slower, so to keep individual runs fast,
// we test a few random samples. With build bots running 24/7, we'll
// get decent coverage over time.
uint64_t random_bits = rng_.NextUint64();
int min = kFftThreshold - static_cast<int>(random_bits & 1023);
random_bits >>= 10;
int max = kFftThreshold + static_cast<int>(random_bits & 1023);
random_bits >>= 10;
// If delta is too small, then this run gets too slow. If it happened
// to be zero, we'd even loop forever!
int delta = 10 + (random_bits & 127);
std::cout << "min " << min << " max " << max << " delta " << delta << "\n";
for (int right_size = min; right_size <= max; right_size += delta) {
for (int left_size = right_size; left_size <= max; left_size += delta) {
ScratchDigits A(left_size);
ScratchDigits B(right_size);
int result_len = MultiplyResultLength(A, B);
ScratchDigits result(result_len);
ScratchDigits result_toom(result_len);
GenerateRandom(A);
GenerateRandom(B);
processor()->MultiplyFFT(result, A, B);
// Using Toom-Cook as reference.
processor()->MultiplyToomCook(result_toom, A, B);
AssertEquals(A, B, result_toom, result);
if (error_) return;
(*count)++;
}
}
#endif // V8_ADVANCED_BIGINT_ALGORITHMS
}
void TestBurnikel(int* count) {
// Start small to save test execution time.
constexpr int kMin = kBurnikelThreshold / 2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment