Commit afa61269 authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

Reland "[bigint] FFT-based multiplication"

The Schönhage-Strassen method for *very* large inputs.

This is a reland of 347ba357,
with added zero-initialization to pacify MSan (spurious report).

Originally:
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3000742
> Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
> Reviewed-by: Maya Lekova <mslekova@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#75659}

Bug: v8:11515
Change-Id: Ieac6e174bde6eb09af0a9a9a49969feabca79e81
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3018081Reviewed-by: 's avatarMaya Lekova <mslekova@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/master@{#75663}
parent 2a6b2055
...@@ -2510,6 +2510,7 @@ filegroup( ...@@ -2510,6 +2510,7 @@ filegroup(
"src/bigint/div-helpers.cc", "src/bigint/div-helpers.cc",
"src/bigint/div-helpers.h", "src/bigint/div-helpers.h",
"src/bigint/div-schoolbook.cc", "src/bigint/div-schoolbook.cc",
"src/bigint/mul-fft.cc",
"src/bigint/mul-karatsuba.cc", "src/bigint/mul-karatsuba.cc",
"src/bigint/mul-schoolbook.cc", "src/bigint/mul-schoolbook.cc",
"src/bigint/mul-toom.cc", "src/bigint/mul-toom.cc",
......
...@@ -4964,7 +4964,10 @@ v8_source_set("v8_bigint") { ...@@ -4964,7 +4964,10 @@ v8_source_set("v8_bigint") {
] ]
if (v8_advanced_bigint_algorithms) { if (v8_advanced_bigint_algorithms) {
sources += [ "src/bigint/mul-toom.cc" ] sources += [
"src/bigint/mul-fft.cc",
"src/bigint/mul-toom.cc",
]
defines = [ "V8_ADVANCED_BIGINT_ALGORITHMS" ] defines = [ "V8_ADVANCED_BIGINT_ALGORITHMS" ]
} }
......
...@@ -35,7 +35,8 @@ void ProcessorImpl::Multiply(RWDigits Z, Digits X, Digits Y) { ...@@ -35,7 +35,8 @@ void ProcessorImpl::Multiply(RWDigits Z, Digits X, Digits Y) {
return MultiplyKaratsuba(Z, X, Y); return MultiplyKaratsuba(Z, X, Y);
#else #else
if (Y.len() < kToomThreshold) return MultiplyKaratsuba(Z, X, Y); if (Y.len() < kToomThreshold) return MultiplyKaratsuba(Z, X, Y);
return MultiplyToomCook(Z, X, Y); if (Y.len() < kFftThreshold) return MultiplyToomCook(Z, X, Y);
return MultiplyFFT(Z, X, Y);
#endif #endif
} }
......
...@@ -14,6 +14,9 @@ namespace bigint { ...@@ -14,6 +14,9 @@ namespace bigint {
constexpr int kKaratsubaThreshold = 34; constexpr int kKaratsubaThreshold = 34;
constexpr int kToomThreshold = 193; constexpr int kToomThreshold = 193;
constexpr int kFftThreshold = 1500;
constexpr int kFftInnerThreshold = 200;
constexpr int kBurnikelThreshold = 57; constexpr int kBurnikelThreshold = 57;
class ProcessorImpl : public Processor { class ProcessorImpl : public Processor {
...@@ -42,6 +45,8 @@ class ProcessorImpl : public Processor { ...@@ -42,6 +45,8 @@ class ProcessorImpl : public Processor {
#if V8_ADVANCED_BIGINT_ALGORITHMS #if V8_ADVANCED_BIGINT_ALGORITHMS
void MultiplyToomCook(RWDigits Z, Digits X, Digits Y); void MultiplyToomCook(RWDigits Z, Digits X, Digits Y);
void Toom3Main(RWDigits Z, Digits X, Digits Y); void Toom3Main(RWDigits Z, Digits X, Digits Y);
void MultiplyFFT(RWDigits Z, Digits X, Digits Y);
#endif // V8_ADVANCED_BIGINT_ALGORITHMS #endif // V8_ADVANCED_BIGINT_ALGORITHMS
// {out_length} initially contains the allocated capacity of {out}, and // {out_length} initially contains the allocated capacity of {out}, and
......
This diff is collapsed.
...@@ -123,11 +123,8 @@ void ProcessorImpl::Toom3Main(RWDigits Z, Digits X, Digits Y) { ...@@ -123,11 +123,8 @@ void ProcessorImpl::Toom3Main(RWDigits Z, Digits X, Digits Y) {
// Phase 3a: Pointwise multiplication, steps 0, 1, m1. // Phase 3a: Pointwise multiplication, steps 0, 1, m1.
Multiply(r_0, X0, Y0); Multiply(r_0, X0, Y0);
if (should_terminate()) return;
Multiply(r_1, p_1, q_1); Multiply(r_1, p_1, q_1);
if (should_terminate()) return;
Multiply(r_m1, p_m1, q_m1); Multiply(r_m1, p_m1, q_m1);
if (should_terminate()) return;
bool r_m1_sign = p_m1_sign != q_m1_sign; bool r_m1_sign = p_m1_sign != q_m1_sign;
// Phase 2b: Evaluation, steps m2 and inf. // Phase 2b: Evaluation, steps m2 and inf.
...@@ -152,14 +149,12 @@ void ProcessorImpl::Toom3Main(RWDigits Z, Digits X, Digits Y) { ...@@ -152,14 +149,12 @@ void ProcessorImpl::Toom3Main(RWDigits Z, Digits X, Digits Y) {
MARK_INVALID(p_m1); MARK_INVALID(p_m1);
MARK_INVALID(q_m1); MARK_INVALID(q_m1);
Multiply(r_m2, p_m2, q_m2); Multiply(r_m2, p_m2, q_m2);
if (should_terminate()) return;
bool r_m2_sign = p_m2_sign != q_m2_sign; bool r_m2_sign = p_m2_sign != q_m2_sign;
RWDigits r_inf(t + r_len, r_len); RWDigits r_inf(t + r_len, r_len);
MARK_INVALID(p_m2); MARK_INVALID(p_m2);
MARK_INVALID(q_m2); MARK_INVALID(q_m2);
Multiply(r_inf, X2, Y2); Multiply(r_inf, X2, Y2);
if (should_terminate()) return;
// Phase 4: Interpolation. // Phase 4: Interpolation.
Digits R0 = r_0; Digits R0 = r_0;
...@@ -215,7 +210,6 @@ void ProcessorImpl::MultiplyToomCook(RWDigits Z, Digits X, Digits Y) { ...@@ -215,7 +210,6 @@ void ProcessorImpl::MultiplyToomCook(RWDigits Z, Digits X, Digits Y) {
if (X.len() > Y.len()) { if (X.len() > Y.len()) {
ScratchDigits T(2 * k); ScratchDigits T(2 * k);
for (int i = k; i < X.len(); i += k) { for (int i = k; i < X.len(); i += k) {
if (should_terminate()) return;
Digits Xi(X, i, k); Digits Xi(X, i, k);
// TODO(jkummerow): would it be a measurable improvement to craft a // TODO(jkummerow): would it be a measurable improvement to craft a
// "ToomChunk" method in the style of {KaratsubaChunk}? // "ToomChunk" method in the style of {KaratsubaChunk}?
......
...@@ -28,7 +28,8 @@ int PrintHelp(char** argv) { ...@@ -28,7 +28,8 @@ int PrintHelp(char** argv) {
} }
#define TESTS(V) \ #define TESTS(V) \
V(kKaratsuba, "karatsuba") V(kBurnikel, "burnikel") V(kToom, "toom") V(kKaratsuba, "karatsuba") \
V(kBurnikel, "burnikel") V(kToom, "toom") V(kFFT, "fft")
enum Operation { kNoOp, kList, kTest }; enum Operation { kNoOp, kList, kTest };
...@@ -168,6 +169,10 @@ class Runner { ...@@ -168,6 +169,10 @@ class Runner {
for (int i = 0; i < runs_; i++) { for (int i = 0; i < runs_; i++) {
TestToom(&count); TestToom(&count);
} }
} else if (test_ == kFFT) {
for (int i = 0; i < runs_; i++) {
TestFFT(&count);
}
} else { } else {
DCHECK(false); // Unreachable. DCHECK(false); // Unreachable.
} }
...@@ -225,6 +230,40 @@ class Runner { ...@@ -225,6 +230,40 @@ class Runner {
#endif // V8_ADVANCED_BIGINT_ALGORITHMS #endif // V8_ADVANCED_BIGINT_ALGORITHMS
} }
void TestFFT(int* count) {
#if V8_ADVANCED_BIGINT_ALGORITHMS
// Larger multiplications are slower, so to keep individual runs fast,
// we test a few random samples. With build bots running 24/7, we'll
// get decent coverage over time.
uint64_t random_bits = rng_.NextUint64();
int min = kFftThreshold - static_cast<int>(random_bits & 1023);
random_bits >>= 10;
int max = kFftThreshold + static_cast<int>(random_bits & 1023);
random_bits >>= 10;
// If delta is too small, then this run gets too slow. If it happened
// to be zero, we'd even loop forever!
int delta = 10 + (random_bits & 127);
std::cout << "min " << min << " max " << max << " delta " << delta << "\n";
for (int right_size = min; right_size <= max; right_size += delta) {
for (int left_size = right_size; left_size <= max; left_size += delta) {
ScratchDigits A(left_size);
ScratchDigits B(right_size);
int result_len = MultiplyResultLength(A, B);
ScratchDigits result(result_len);
ScratchDigits result_toom(result_len);
GenerateRandom(A);
GenerateRandom(B);
processor()->MultiplyFFT(result, A, B);
// Using Toom-Cook as reference.
processor()->MultiplyToomCook(result_toom, A, B);
AssertEquals(A, B, result_toom, result);
if (error_) return;
(*count)++;
}
}
#endif // V8_ADVANCED_BIGINT_ALGORITHMS
}
void TestBurnikel(int* count) { void TestBurnikel(int* count) {
// Start small to save test execution time. // Start small to save test execution time.
constexpr int kMin = kBurnikelThreshold / 2; constexpr int kMin = kBurnikelThreshold / 2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment