Commit 6fbb8bc8 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[intl] A fast path for Intl::CompareStrings

Certain collators and subject strings may take this new fast
path without calling into the (slow) ICU comparison functions.

This CL can be roughly split into three topics:
1. The fast path check, precomputed and implemented as a whitelist
   on the current locale string.
2. The actual fast path, which checks subject string eligibility
   and performs L1 and L3 collation weight comparisons all in one pass.
3. Resuming from an aborted fast-path into the generic path.

A longer overview is available at
https://docs.google.com/document/d/1oyDwjYn2JyHsx2YnJJKhjX0WMNQXb8ao86-DRzqiYNg/edit?usp=sharing

JetStream2/cdjs scores improve by roughly 40%.

Bug: v8:12196
Change-Id: I5e1bbd731a36c361af9667f9104d6fa15c42e117
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3149463Reviewed-by: 's avatarToon Verwaest <verwaest@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77284}
parent c07776ad
This diff is collapsed.
......@@ -96,10 +96,17 @@ class Intl {
Isolate* isolate, Handle<String> s1, Handle<String> s2,
Handle<Object> locales, Handle<Object> options, const char* method_name);
V8_WARN_UNUSED_RESULT static int CompareStrings(Isolate* isolate,
const icu::Collator& collator,
Handle<String> s1,
Handle<String> s2);
enum class CompareStringsOptions {
kNone,
kTryFastPath,
};
V8_EXPORT_PRIVATE static CompareStringsOptions CompareStringsOptionsFor(
Isolate* isolate, Handle<Object> locales, Handle<Object> options);
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static int CompareStrings(
Isolate* isolate, const icu::Collator& collator, Handle<String> s1,
Handle<String> s2,
CompareStringsOptions compare_strings_options =
CompareStringsOptions::kNone);
// ecma402/#sup-properties-of-the-number-prototype-object
V8_WARN_UNUSED_RESULT static MaybeHandle<String> NumberToLocaleString(
......@@ -119,7 +126,7 @@ class Intl {
int mnfd_default, int mxfd_default,
bool notation_is_compact);
// Helper funciton to convert a UnicodeString to a Handle<String>
// Helper function to convert a UnicodeString to a Handle<String>
V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString(
Isolate* isolate, const icu::UnicodeString& string);
......@@ -261,7 +268,8 @@ class Intl {
// Convert a Handle<String> to icu::UnicodeString
static icu::UnicodeString ToICUUnicodeString(Isolate* isolate,
Handle<String> string);
Handle<String> string,
int offset = 0);
static const uint8_t* ToLatin1LowerTable();
......
......@@ -34,7 +34,7 @@ namespace internal {
class JSCollator : public TorqueGeneratedJSCollator<JSCollator, JSObject> {
public:
// ecma402/#sec-initializecollator
V8_WARN_UNUSED_RESULT static MaybeHandle<JSCollator> New(
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<JSCollator> New(
Isolate* isolate, Handle<Map> map, Handle<Object> locales,
Handle<Object> options, const char* service);
......
......@@ -241,7 +241,7 @@ class JSFunction
// Creates a map that matches the constructor's initial map, but with
// [[prototype]] being new.target.prototype. Because new.target can be a
// JSProxy, this can call back into JavaScript.
static V8_WARN_UNUSED_RESULT MaybeHandle<Map> GetDerivedMap(
V8_EXPORT_PRIVATE static V8_WARN_UNUSED_RESULT MaybeHandle<Map> GetDerivedMap(
Isolate* isolate, Handle<JSFunction> constructor,
Handle<JSReceiver> new_target);
......
......@@ -142,6 +142,8 @@ class String : public TorqueGeneratedString<String, Name> {
return onebyte_start == other.onebyte_start;
}
int length() const { return length_; }
private:
enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
......
......@@ -136,6 +136,8 @@
'test-strings/Traverse': [PASS, HEAVY],
'test-swiss-name-dictionary-csa/DeleteAtBoundaries': [PASS, HEAVY],
'test-swiss-name-dictionary-csa/SameH2': [PASS, HEAVY],
'test-intl/StringLocaleCompareFastPath': [['mode != release', SKIP], SLOW, NO_VARIANTS],
}], # ALWAYS
##############################################################################
......
......@@ -4,8 +4,9 @@
#ifdef V8_INTL_SUPPORT
#include "src/objects/intl-objects.h"
#include "src/objects/js-break-iterator.h"
#include "src/objects/js-collator.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/js-date-time-format.h"
#include "src/objects/js-list-format.h"
#include "src/objects/js-number-format.h"
......@@ -16,6 +17,7 @@
#include "src/objects/objects-inl.h"
#include "src/objects/option-utils.h"
#include "test/cctest/cctest.h"
#include "unicode/coll.h"
namespace v8 {
namespace internal {
......@@ -248,6 +250,64 @@ TEST(GetAvailableLocales) {
CHECK(!locales.count("abcdefg"));
}
// Tests that the LocaleCompare fast path and generic path return the same
// comparison results for all ASCII strings.
TEST(StringLocaleCompareFastPath) {
LocalContext env;
Isolate* isolate = CcTest::i_isolate();
HandleScope handle_scope(isolate);
// We compare all single-char strings of printable ASCII characters.
std::vector<Handle<String>> ascii_strings;
for (int c = 0; c <= 0x7F; c++) {
if (!std::isprint(c)) continue;
ascii_strings.push_back(
isolate->factory()->LookupSingleCharacterStringFromCode(c));
}
Handle<JSFunction> collator_constructor = Handle<JSFunction>(
JSFunction::cast(
isolate->context().native_context().intl_collator_function()),
isolate);
Handle<Map> constructor_map =
JSFunction::GetDerivedMap(isolate, collator_constructor,
collator_constructor)
.ToHandleChecked();
Handle<Object> options(ReadOnlyRoots(isolate).undefined_value(), isolate);
static const char* const kMethodName = "StringLocaleCompareFastPath";
// For all fast locales, exhaustively compare within the printable ASCII
// range.
const std::set<std::string>& locales = JSCollator::GetAvailableLocales();
for (const std::string& locale : locales) {
Handle<String> locale_string =
isolate->factory()->NewStringFromAsciiChecked(locale.c_str());
if (Intl::CompareStringsOptionsFor(isolate, locale_string, options) !=
Intl::CompareStringsOptions::kTryFastPath) {
continue;
}
Handle<JSCollator> collator =
JSCollator::New(isolate, constructor_map, locale_string, options,
kMethodName)
.ToHandleChecked();
for (size_t i = 0; i < ascii_strings.size(); i++) {
Handle<String> lhs = ascii_strings[i];
for (size_t j = i + 1; j < ascii_strings.size(); j++) {
Handle<String> rhs = ascii_strings[j];
CHECK_EQ(
Intl::CompareStrings(isolate, *collator->icu_collator().raw(), lhs,
rhs, Intl::CompareStringsOptions::kNone),
Intl::CompareStrings(isolate, *collator->icu_collator().raw(), lhs,
rhs,
Intl::CompareStringsOptions::kTryFastPath));
}
}
}
}
} // namespace internal
} // namespace v8
......
......@@ -26,22 +26,37 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
[
################################################################################
[ALWAYS, {
# TODO(jochen): The following test is flaky.
'overrides/caching': [PASS, FAIL],
}], # ALWAYS
################################################################################
['mode == debug', {
# Skip slow tests in debug mode.
'general/empty-handle': [SKIP],
}], # mode == debug
##############################################################################
['no_i18n', {
'string-localecompare': [SKIP],
}], # no_i18n
################################################################################
['gc_stress', {
# Push limit of stack, too flaky with machine with more memory.
'regress-1130489': [SKIP],
}], # gc_stress
################################################################################
['variant == no_wasm_traps', {
'*': [SKIP],
}], # variant == no_wasm_traps
################################################################################
['system == windows', {
# noi18n cannot turn on ICU backend for Date
# noi18n cannot turn on ICU backend for Date.
'relative-time-format/default-locale-fr-CA': [SKIP],
'relative-time-format/default-locale-pt-BR': [SKIP],
......@@ -52,6 +67,7 @@
'regress-7770': [SKIP],
}], # system == windows'
################################################################################
['system == android', {
# Android's ICU data file does not have the Chinese/Japanese dictionary
# required for the test to pass.
......
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Equal prefix.
assertTrue("asdf".localeCompare("asdf") == 0);
assertTrue("asd".localeCompare("asdf") < 0);
assertTrue("asdff".localeCompare("asdf") > 0);
// Chars differ.
assertTrue("asdf".localeCompare("asdq") < 0);
assertTrue("asdq".localeCompare("asdf") > 0);
// Interesting locales.
assertEquals('ö'.localeCompare('oe', 'de'), -1);
assertEquals('Ö'.localeCompare('oe', 'de'), -1);
assertEquals('ö'.localeCompare('o', 'de-u-co-phonebk'), 1);
assertEquals('ö'.localeCompare('p', 'de-u-co-phonebk'), -1);
assertEquals('Ö'.localeCompare('o', 'de-u-co-phonebk'), 1);
assertEquals('Ö'.localeCompare('p', 'de-u-co-phonebk'), -1);
assertEquals('ö'.localeCompare('o\u0308', 'de-u-co-phonebk'), 0);
assertEquals('ö'.localeCompare('O\u0308', 'de-u-co-phonebk'), -1);
assertEquals('Ö'.localeCompare('o\u0308', 'de-u-co-phonebk'), 1);
assertEquals('Ö'.localeCompare('O\u0308', 'de-u-co-phonebk'), 0);
assertEquals('ch'.localeCompare('ca', 'cs-CZ'), 1);
assertEquals('AA'.localeCompare('A-A', 'th'), 0);
// Attempt to hit different cases of the localeCompare fast path.
assertEquals('aAaaaö'.localeCompare('aaaaaö', 'en-US'), 1);
assertEquals('aaaaaöA'.localeCompare('aaaaaöa', 'en-US'), 1);
assertEquals('ab\u0308'.localeCompare('aa\u0308', 'en-US'), 1);
assertEquals('aA'.localeCompare('aaa', 'en-US'), -1);
assertEquals('aa'.localeCompare('aAa', 'en-US'), -1);
assertEquals('aA'.localeCompare('aa', 'en-US'), 1);
assertEquals('aa'.localeCompare('aA', 'en-US'), -1);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment