Commit bb24140c authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

[Intl] Cutting 43K by removing Unibrow when ICU available

Making 43K of room for landing ICU64.

Size Change (on x64.release)
D8 before 23,683,192
D8 after 23,639,296
Reduce 43,896 bytes

Bugs: v8:8348

Change-Id: I057f7d59e955a2e5e017873e5b3b5daf5b142ae2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1478710
Commit-Queue: Frank Tang <ftang@chromium.org>
Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60616}
parent 46e944db
......@@ -970,6 +970,7 @@ class Isolate final : private HiddenFactory {
ThreadManager* thread_manager() { return thread_manager_; }
#ifndef V8_INTL_SUPPORT
unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize() {
return &jsregexp_uncanonicalize_;
}
......@@ -978,14 +979,15 @@ class Isolate final : private HiddenFactory {
return &jsregexp_canonrange_;
}
RuntimeState* runtime_state() { return &runtime_state_; }
Builtins* builtins() { return &builtins_; }
unibrow::Mapping<unibrow::Ecma262Canonicalize>*
regexp_macro_assembler_canonicalize() {
return &regexp_macro_assembler_canonicalize_;
}
#endif // !V8_INTL_SUPPORT
RuntimeState* runtime_state() { return &runtime_state_; }
Builtins* builtins() { return &builtins_; }
RegExpStack* regexp_stack() { return regexp_stack_; }
......@@ -996,11 +998,6 @@ class Isolate final : private HiddenFactory {
std::vector<int>* regexp_indices() { return &regexp_indices_; }
unibrow::Mapping<unibrow::Ecma262Canonicalize>*
interp_canonicalize_mapping() {
return &regexp_macro_assembler_canonicalize_;
}
Debug* debug() { return debug_; }
bool* is_profiling_address() { return &is_profiling_; }
......@@ -1642,10 +1639,12 @@ class Isolate final : private HiddenFactory {
RuntimeState runtime_state_;
Builtins builtins_;
SetupIsolateDelegate* setup_delegate_ = nullptr;
#ifndef V8_INTL_SUPPORT
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize_;
unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange_;
unibrow::Mapping<unibrow::Ecma262Canonicalize>
regexp_macro_assembler_canonicalize_;
#endif // !V8_INTL_SUPPORT
RegExpStack* regexp_stack_ = nullptr;
std::vector<int> regexp_indices_;
DateCache* date_cache_ = nullptr;
......
......@@ -21,8 +21,6 @@
namespace v8 {
namespace internal {
using Canonicalize = unibrow::Mapping<unibrow::Ecma262Canonicalize>;
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
int len, Vector<const uc16> subject,
bool unicode) {
......
This diff is collapsed.
......@@ -12,6 +12,7 @@
#ifdef V8_INTL_SUPPORT
#include "unicode/uchar.h"
#include "unicode/unistr.h"
#endif // V8_INTL_SUPPORT
namespace v8 {
......@@ -33,37 +34,17 @@ int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
// A GC might move the calling generated code and invalidate the
// return address on the stack.
DCHECK_EQ(0, byte_length % 2);
#ifdef V8_INTL_SUPPORT
int32_t length = (int32_t)(byte_length >> 1);
icu::UnicodeString uni_str_1(reinterpret_cast<const char16_t*>(byte_offset1),
length);
return uni_str_1.caseCompare(reinterpret_cast<const char16_t*>(byte_offset2),
length, U_FOLD_CASE_DEFAULT) == 0;
#else
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
size_t length = byte_length >> 1;
#ifdef V8_INTL_SUPPORT
if (isolate == nullptr) {
for (size_t i = 0; i < length; i++) {
uc32 c1 = substring1[i];
uc32 c2 = substring2[i];
if (unibrow::Utf16::IsLeadSurrogate(c1)) {
// Non-BMP characters do not have case-equivalents in the BMP.
// Both have to be non-BMP for them to be able to match.
if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
if (i + 1 < length) {
uc16 c1t = substring1[i + 1];
uc16 c2t = substring2[i + 1];
if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
unibrow::Utf16::IsTrailSurrogate(c2t)) {
c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
i++;
}
}
}
c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
if (c1 != c2) return 0;
}
return 1;
}
#endif // V8_INTL_SUPPORT
DCHECK_NOT_NULL(isolate);
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
isolate->regexp_macro_assembler_canonicalize();
......@@ -83,6 +64,7 @@ int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
}
}
return 1;
#endif // V8_INTL_SUPPORT
}
......
......@@ -11,6 +11,7 @@
namespace unibrow {
#ifndef V8_INTL_SUPPORT
template <class T, int s> bool Predicate<T, s>::get(uchar code_point) {
CacheEntry entry = entries_[code_point & kMask];
if (entry.code_point() == code_point) return entry.value();
......@@ -55,6 +56,7 @@ template <class T, int s> int Mapping<T, s>::CalculateValue(uchar c, uchar n,
return length;
}
}
#endif // !V8_INTL_SUPPORT
// Decodes UTF-8 bytes incrementally, allowing the decoding of bytes as they
// stream in. This **must** be followed by a call to ValueOfIncrementalFinish
......
......@@ -15,8 +15,11 @@
namespace unibrow {
#ifndef V8_INTL_SUPPORT
static const int kStartBit = (1 << 30);
static const int kChunkBits = (1 << 13);
#endif // !V8_INTL_SUPPORT
static const uchar kSentinel = static_cast<uchar>(-1);
/**
......@@ -28,7 +31,7 @@ typedef signed short int16_t; // NOLINT
typedef unsigned short uint16_t; // NOLINT
typedef int int32_t; // NOLINT
#ifndef V8_INTL_SUPPORT
// All access to the character table should go through this function.
template <int D>
static inline uchar TableGet(const int32_t* table, int index) {
......@@ -44,7 +47,6 @@ static inline bool IsStart(int32_t entry) {
return (entry & kStartBit) != 0;
}
#ifndef V8_INTL_SUPPORT
/**
* Look up a character in the Unicode table using a mix of binary and
* interpolation search. For a uniformly distributed array
......@@ -92,6 +94,7 @@ struct MultiCharacterSpecialCase {
uchar chars[kW];
};
#ifndef V8_INTL_SUPPORT
// Look up the mapping for the given character in the specified table,
// which is of the specified length and uses the specified special case
// mapping for multi-char mappings. The next parameter is the character
......@@ -192,6 +195,7 @@ static int LookupMapping(const int32_t* table,
return 0;
}
}
#endif // !V8_INTL_SUPPORT
// This method decodes an UTF-8 value according to RFC 3629 and
// https://encoding.spec.whatwg.org/#utf-8-decoder .
......@@ -1596,7 +1600,6 @@ int ToUppercase::Convert(uchar c,
default: return 0;
}
}
#endif // !V8_INTL_SUPPORT
static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings0[1] = { // NOLINT
{{kSentinel}} }; // NOLINT
......@@ -3072,98 +3075,75 @@ int CanonicalizationRange::Convert(uchar c,
}
}
const uchar UnicodeData::kMaxCodePoint = 0xFFFD;
int UnicodeData::GetByteCount() {
#ifndef V8_INTL_SUPPORT // NOLINT
return kUppercaseTable0Size * sizeof(int32_t) // NOLINT
+ kUppercaseTable1Size * sizeof(int32_t) // NOLINT
+ kUppercaseTable5Size * sizeof(int32_t) // NOLINT
+ kUppercaseTable7Size * sizeof(int32_t) // NOLINT
+ kLetterTable0Size * sizeof(int32_t) // NOLINT
+ kLetterTable1Size * sizeof(int32_t) // NOLINT
+ kLetterTable2Size * sizeof(int32_t) // NOLINT
+ kLetterTable3Size * sizeof(int32_t) // NOLINT
+ kLetterTable4Size * sizeof(int32_t) // NOLINT
+ kLetterTable5Size * sizeof(int32_t) // NOLINT
+ kLetterTable6Size * sizeof(int32_t) // NOLINT
+ kLetterTable7Size * sizeof(int32_t) // NOLINT
+ kID_StartTable0Size * sizeof(int32_t) // NOLINT
+ kID_StartTable1Size * sizeof(int32_t) // NOLINT
+ kID_StartTable2Size * sizeof(int32_t) // NOLINT
+ kID_StartTable3Size * sizeof(int32_t) // NOLINT
+ kID_StartTable4Size * sizeof(int32_t) // NOLINT
+ kID_StartTable5Size * sizeof(int32_t) // NOLINT
+ kID_StartTable6Size * sizeof(int32_t) // NOLINT
+ kID_StartTable7Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable0Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable1Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable5Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable7Size * sizeof(int32_t) // NOLINT
+ kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT
+ kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT
+ kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT
+
kToLowercaseMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+
kToLowercaseMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kToLowercaseMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kToLowercaseMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kToUppercaseMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
+
kToUppercaseMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kToUppercaseMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kToUppercaseMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
#else
return
#endif // !V8_INTL_SUPPORT
+
kEcma262CanonicalizeMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kEcma262CanonicalizeMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kEcma262CanonicalizeMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kEcma262CanonicalizeMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kEcma262UnCanonicalizeMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<4>) // NOLINT
+
kEcma262UnCanonicalizeMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+
kEcma262UnCanonicalizeMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+
kEcma262UnCanonicalizeMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+
kCanonicalizationRangeMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kCanonicalizationRangeMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+
kCanonicalizationRangeMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<1>); // NOLINT
return kUppercaseTable0Size * sizeof(int32_t) // NOLINT
+ kUppercaseTable1Size * sizeof(int32_t) // NOLINT
+ kUppercaseTable5Size * sizeof(int32_t) // NOLINT
+ kUppercaseTable7Size * sizeof(int32_t) // NOLINT
+ kLetterTable0Size * sizeof(int32_t) // NOLINT
+ kLetterTable1Size * sizeof(int32_t) // NOLINT
+ kLetterTable2Size * sizeof(int32_t) // NOLINT
+ kLetterTable3Size * sizeof(int32_t) // NOLINT
+ kLetterTable4Size * sizeof(int32_t) // NOLINT
+ kLetterTable5Size * sizeof(int32_t) // NOLINT
+ kLetterTable6Size * sizeof(int32_t) // NOLINT
+ kLetterTable7Size * sizeof(int32_t) // NOLINT
+ kID_StartTable0Size * sizeof(int32_t) // NOLINT
+ kID_StartTable1Size * sizeof(int32_t) // NOLINT
+ kID_StartTable2Size * sizeof(int32_t) // NOLINT
+ kID_StartTable3Size * sizeof(int32_t) // NOLINT
+ kID_StartTable4Size * sizeof(int32_t) // NOLINT
+ kID_StartTable5Size * sizeof(int32_t) // NOLINT
+ kID_StartTable6Size * sizeof(int32_t) // NOLINT
+ kID_StartTable7Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable0Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable1Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable5Size * sizeof(int32_t) // NOLINT
+ kID_ContinueTable7Size * sizeof(int32_t) // NOLINT
+ kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT
+ kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT
+ kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT
+ kToLowercaseMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+ kToLowercaseMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kToLowercaseMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kToLowercaseMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kToUppercaseMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
+ kToUppercaseMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kToUppercaseMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kToUppercaseMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
+ kEcma262CanonicalizeMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kEcma262CanonicalizeMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kEcma262CanonicalizeMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kEcma262CanonicalizeMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kEcma262UnCanonicalizeMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<4>) // NOLINT
+ kEcma262UnCanonicalizeMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+ kEcma262UnCanonicalizeMultiStrings5Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+ kEcma262UnCanonicalizeMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
+ kCanonicalizationRangeMultiStrings0Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kCanonicalizationRangeMultiStrings1Size *
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
+ kCanonicalizationRangeMultiStrings7Size *
sizeof(MultiCharacterSpecialCase<1>); // NOLINT
}
#endif // !V8_INTL_SUPPORT
} // namespace unibrow
......@@ -25,6 +25,7 @@ typedef unsigned char byte;
*/
const int kMaxMappingSize = 4;
#ifndef V8_INTL_SUPPORT
template <class T, int size = 256>
class Predicate {
public:
......@@ -87,7 +88,6 @@ class Mapping {
CacheEntry entries_[kSize];
};
class UnicodeData {
private:
friend class Test;
......@@ -95,6 +95,7 @@ class UnicodeData {
static const uchar kMaxCodePoint;
};
#endif // !V8_INTL_SUPPORT
class Utf16 {
public:
......@@ -227,7 +228,6 @@ struct ToUppercase {
uchar* result,
bool* allow_caching_ptr);
};
#endif
struct Ecma262Canonicalize {
static const int kMaxWidth = 1;
static int Convert(uchar c,
......@@ -249,6 +249,7 @@ struct CanonicalizationRange {
uchar* result,
bool* allow_caching_ptr);
};
#endif // !V8_INTL_SUPPORT
} // namespace unibrow
......
......@@ -1488,7 +1488,7 @@ TEST(AddInverseToTable) {
CHECK(table.Get(0xFFFF)->Get(0));
}
#ifndef V8_INTL_SUPPORT
static uc32 canonicalize(uc32 c) {
unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
......@@ -1500,7 +1500,6 @@ static uc32 canonicalize(uc32 c) {
}
}
TEST(LatinCanonicalize) {
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
......@@ -1514,7 +1513,6 @@ TEST(LatinCanonicalize) {
}
for (uc32 c = 128; c < (1 << 21); c++)
CHECK_GE(canonicalize(c), 128);
#ifndef V8_INTL_SUPPORT
unibrow::Mapping<unibrow::ToUppercase> to_upper;
// Canonicalization is only defined for the Basic Multilingual Plane.
for (uc32 c = 0; c < (1 << 16); c++) {
......@@ -1529,10 +1527,8 @@ TEST(LatinCanonicalize) {
u = c;
CHECK_EQ(u, canonicalize(c));
}
#endif
}
static uc32 CanonRangeEnd(uc32 c) {
unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
......@@ -1588,6 +1584,7 @@ TEST(UncanonicalizeEquivalence) {
}
}
#endif
static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
Vector<CharacterRange> expected) {
......@@ -1621,21 +1618,26 @@ TEST(CharacterRangeCaseIndependence) {
CharacterRange::Singleton('A'));
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
CharacterRange::Singleton('Z'));
#ifndef V8_INTL_SUPPORT
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
CharacterRange::Range('A', 'Z'));
#endif // !V8_INTL_SUPPORT
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
CharacterRange::Range('C', 'F'));
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
CharacterRange::Range('A', 'B'));
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
CharacterRange::Range('Y', 'Z'));
#ifndef V8_INTL_SUPPORT
TestSimpleRangeCaseIndependence(isolate,
CharacterRange::Range('a' - 1, 'z' + 1),
CharacterRange::Range('A', 'Z'));
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
CharacterRange::Range('a', 'z'));
#endif // !V8_INTL_SUPPORT
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
CharacterRange::Range('c', 'f'));
#ifndef V8_INTL_SUPPORT
TestSimpleRangeCaseIndependence(isolate,
CharacterRange::Range('A' - 1, 'Z' + 1),
CharacterRange::Range('a', 'z'));
......@@ -1644,6 +1646,7 @@ TEST(CharacterRangeCaseIndependence) {
// whole block at a time.
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
CharacterRange::Range('a', 'z'));
#endif // !V8_INTL_SUPPORT
}
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
function testRegExpI(text, msg) {
assertTrue(new RegExp(text, 'i').test(text.toUpperCase()), msg + ': ' + text);
}
testRegExpI('abc', 'ASCII');
testRegExpI('ABC', 'ASCII');
testRegExpI('rst', 'ASCII');
testRegExpI('RST', 'ASCII');
testRegExpI('αβψδεφ', 'Greek');
testRegExpI('\u1c80\u1c81', 'Historic Cyrillic added in Unicode 9');
testRegExpI('\u026A', 'Dotless I, uppercase form added in Unicode 9');
testRegExpI('ოქტ', 'Georgian Mtavruli added in Unicode 11');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment