Commit a705e655 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[regexp] Remove partial property sequence support

The proposal has changed and we'll start on the new implementation
from scratch.

Bug: v8:11935, v8:7467
Change-Id: I29e39a414027d80fd91764ce02a05d7c032a41f7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3178964
Auto-Submit: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarMathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77016}
parent df91e1ed
......@@ -299,7 +299,6 @@ DEFINE_BOOL(harmony_shipping, true, "enable all shipped harmony features")
// Features that are still work in progress (behind individual flags).
#define HARMONY_INPROGRESS_BASE(V) \
V(harmony_regexp_sequence, "RegExp Unicode sequence properties") \
V(harmony_weak_refs_with_cleanup_some, \
"harmony weak references with FinalizationRegistry.prototype.cleanupSome") \
V(harmony_import_assertions, "harmony import assertions") \
......
......@@ -4368,7 +4368,6 @@ void Genesis::InitializeCallSiteBuiltins() {
#define EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(id) \
void Genesis::InitializeGlobal_##id() {}
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_sequence)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_top_level_await)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_import_assertions)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_private_brand_checks)
......
......@@ -271,7 +271,6 @@ class RegExpParserImpl final {
const ZoneVector<char>& name_1,
const ZoneVector<char>& name_2);
RegExpTree* GetPropertySequence(const ZoneVector<char>& name_1);
RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
base::uc32 ParseOctalLiteral();
......@@ -737,13 +736,6 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
builder->AddCharacterClass(cc);
break;
}
if (p == 'p' && name_2.empty()) {
RegExpTree* sequence = GetPropertySequence(name_1);
if (sequence != nullptr) {
builder->AddAtom(sequence);
break;
}
}
}
return ReportError(RegExpError::kInvalidPropertyName);
} else {
......@@ -1739,72 +1731,6 @@ bool RegExpParserImpl<CharT>::AddPropertyClassRange(
}
}
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::GetPropertySequence(
const ZoneVector<char>& name_1) {
if (!FLAG_harmony_regexp_sequence) return nullptr;
const char* name = name_1.data();
const base::uc32* sequence_list = nullptr;
RegExpFlags flags = RegExpFlag::kUnicode;
if (NameEquals(name, "Emoji_Flag_Sequence")) {
sequence_list = UnicodePropertySequences::kEmojiFlagSequences;
} else if (NameEquals(name, "Emoji_Tag_Sequence")) {
sequence_list = UnicodePropertySequences::kEmojiTagSequences;
} else if (NameEquals(name, "Emoji_ZWJ_Sequence")) {
sequence_list = UnicodePropertySequences::kEmojiZWJSequences;
}
if (sequence_list != nullptr) {
// TODO(yangguo): this creates huge regexp code. Alternative to this is
// to create a new operator that checks for these sequences at runtime.
RegExpBuilder builder(zone(), flags);
while (true) { // Iterate through list of sequences.
while (*sequence_list != 0) { // Iterate through sequence.
builder.AddUnicodeCharacter(*sequence_list);
sequence_list++;
}
sequence_list++;
if (*sequence_list == 0) break;
builder.NewAlternative();
}
return builder.ToRegExp();
}
if (NameEquals(name, "Emoji_Keycap_Sequence")) {
// https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
// emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
RegExpBuilder builder(zone(), flags);
ZoneList<CharacterRange>* prefix_ranges =
zone()->template New<ZoneList<CharacterRange>>(2, zone());
prefix_ranges->Add(CharacterRange::Range('0', '9'), zone());
prefix_ranges->Add(CharacterRange::Singleton('#'), zone());
prefix_ranges->Add(CharacterRange::Singleton('*'), zone());
builder.AddCharacterClass(
zone()->template New<RegExpCharacterClass>(zone(), prefix_ranges));
builder.AddCharacter(0xFE0F);
builder.AddCharacter(0x20E3);
return builder.ToRegExp();
} else if (NameEquals(name, "Emoji_Modifier_Sequence")) {
// https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
// emoji_modifier_sequence := emoji_modifier_base emoji_modifier
RegExpBuilder builder(zone(), flags);
ZoneList<CharacterRange>* modifier_base_ranges =
zone()->template New<ZoneList<CharacterRange>>(2, zone());
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER_BASE, "Y", false,
modifier_base_ranges, zone());
builder.AddCharacterClass(zone()->template New<RegExpCharacterClass>(
zone(), modifier_base_ranges));
ZoneList<CharacterRange>* modifier_ranges =
zone()->template New<ZoneList<CharacterRange>>(2, zone());
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER, "Y", false, modifier_ranges,
zone());
builder.AddCharacterClass(
zone()->template New<RegExpCharacterClass>(zone(), modifier_ranges));
return builder.ToRegExp();
}
return nullptr;
}
#else // V8_INTL_SUPPORT
template <class CharT>
......@@ -1820,12 +1746,6 @@ bool RegExpParserImpl<CharT>::AddPropertyClassRange(
return false;
}
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::GetPropertySequence(
const ZoneVector<char>& name) {
return nullptr;
}
#endif // V8_INTL_SUPPORT
template <class CharT>
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-sequence
// These tests have been generated by the script at
// https://gist.github.com/mathiasbynens/3b42c99a227521dabfe68d9e63f00f42.
// Do not modify this file directly!
const re = /\p{Emoji_Keycap_Sequence}/u;
assertTrue(re.test('#\uFE0F\u20E3'));
assertTrue(re.test('9\uFE0F\u20E3'));
assertTrue(re.test('0\uFE0F\u20E3'));
assertTrue(re.test('1\uFE0F\u20E3'));
assertTrue(re.test('2\uFE0F\u20E3'));
assertTrue(re.test('3\uFE0F\u20E3'));
assertTrue(re.test('*\uFE0F\u20E3'));
assertTrue(re.test('5\uFE0F\u20E3'));
assertTrue(re.test('6\uFE0F\u20E3'));
assertTrue(re.test('7\uFE0F\u20E3'));
assertTrue(re.test('8\uFE0F\u20E3'));
assertTrue(re.test('4\uFE0F\u20E3'));
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-sequence
// These tests have been generated by the script at
// https://gist.github.com/mathiasbynens/3b42c99a227521dabfe68d9e63f00f42.
// Do not modify this file directly!
const re = /\p{Emoji_Tag_Sequence}/u;
assertTrue(re.test('\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}'));
assertTrue(re.test('\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}'));
assertTrue(re.test('\u{1F3F4}\u{E0067}\u{E0062}\u{E0077}\u{E006C}\u{E0073}\u{E007F}'));
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-sequence
// Normal usage.
assertDoesNotThrow("/\\p{Emoji_Flag_Sequence}/u");
assertTrue(/\p{Emoji_Flag_Sequence}/u.test("\u{1F1E9}\u{1F1EA}"));
assertDoesNotThrow("/\\p{Emoji_Keycap_Sequence}/u");
assertTrue(/\p{Emoji_Keycap_Sequence}/u.test("\u0023\uFE0F\u20E3"));
assertDoesNotThrow("/\\p{Emoji_Keycap_Sequence}/u");
assertFalse(/\p{Emoji_Keycap_Sequence}/u.test("\u0022\uFE0F\u20E3"));
assertDoesNotThrow("/\\p{Emoji_Modifier_Sequence}/u");
assertTrue(/\p{Emoji_Modifier_Sequence}/u.test("\u26F9\u{1F3FF}"));
assertDoesNotThrow("/\\p{Emoji_ZWJ_Sequence}/u");
assertTrue(/\p{Emoji_ZWJ_Sequence}/u.test("\u{1F468}\u{200D}\u{1F467}"));
// Without unicode flag.
assertDoesNotThrow("/\\p{Emoji_Flag_Sequence}/");
assertFalse(/\p{Emoji_Flag_Sequence}/.test("\u{1F1E9}\u{1F1EA}"));
assertTrue(/\p{Emoji_Flag_Sequence}/.test("\\p{Emoji_Flag_Sequence}"));
// Negated and/or inside a character class.
assertThrows("/\\P{Emoji_Flag_Sequence}/u");
assertThrows("/\\P{Emoji_Keycap_Sequence}/u");
assertThrows("/\\P{Emoji_Modifier_Sequence}/u");
assertThrows("/\\P{Emoji_Tag_Sequence}/u");
assertThrows("/\\P{Emoji_ZWJ_Sequence}/u");
assertThrows("/[\\p{Emoji_Flag_Sequence}]/u");
assertThrows("/[\\p{Emoji_Keycap_Sequence}]/u");
assertThrows("/[\\p{Emoji_Modifier_Sequence}]/u");
assertThrows("/[\\p{Emoji_Tag_Sequence}]/u");
assertThrows("/[\\p{Emoji_ZWJ_Sequence}]/u");
assertThrows("/[\\P{Emoji_Flag_Sequence}]/u");
assertThrows("/[\\P{Emoji_Keycap_Sequence}]/u");
assertThrows("/[\\P{Emoji_Modifier_Sequence}]/u");
assertThrows("/[\\P{Emoji_Tag_Sequence}]/u");
assertThrows("/[\\P{Emoji_ZWJ_Sequence}]/u");
assertThrows("/[\\w\\p{Emoji_Flag_Sequence}]/u");
assertThrows("/[\\w\\p{Emoji_Keycap_Sequence}]/u");
assertThrows("/[\\w\\p{Emoji_Modifier_Sequence}]/u");
assertThrows("/[\\w\\p{Emoji_Tag_Sequence}]/u");
assertThrows("/[\\w\\p{Emoji_ZWJ_Sequence}]/u");
assertThrows("/[\\w\\P{Emoji_Flag_Sequence}]/u");
assertThrows("/[\\w\\P{Emoji_Keycap_Sequence}]/u");
assertThrows("/[\\w\\P{Emoji_Modifier_Sequence}]/u");
assertThrows("/[\\w\\P{Emoji_Tag_Sequence}]/u");
assertThrows("/[\\w\\P{Emoji_ZWJ_Sequence}]/u");
// Two regional indicators, but not a country.
assertFalse(/\p{Emoji_Flag_Sequence}/u.test("\u{1F1E6}\u{1F1E6}"));
// ZWJ sequence as in two ZWJ elements joined by a ZWJ, but not in the list.
assertFalse(/\p{Emoji_ZWJ_Sequence}/u.test("\u{1F467}\u{200D}\u{1F468}"));
// More complex regexp
assertEquals(
["country flag: \u{1F1E6}\u{1F1F9}"],
/Country Flag: \p{Emoji_Flag_Sequence}/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austria"));
assertEquals(
["country flag: \u{1F1E6}\u{1F1F9}", "\u{1F1E6}\u{1F1F9}"],
/Country Flag: (\p{Emoji_Flag_Sequence})/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austria"));
assertEquals(
["country flag: \u{1F1E6}\u{1F1F9}"],
/Country Flag: ..(?<=\p{Emoji_Flag_Sequence})/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austria"));
assertEquals(
["flag: \u{1F1E6}\u{1F1F9}", "\u{1F1E6}\u{1F1F9}"],
/Flag: ..(?<=(\p{Emoji_Flag_Sequence})|\p{Emoji_Keycap_Sequence})/iu.exec(
"this is an example of a country flag: \u{1F1E6}\u{1F1F9} is Austria"));
// Partial sequences.
assertFalse(/\p{Emoji_Flag_Sequence}/u.test("\u{1F1E6}_"));
assertFalse(/\p{Emoji_Keycap_Sequence}/u.test("2\uFE0F_"));
assertFalse(/\p{Emoji_Modifier_Sequence}/u.test("\u261D_"));
assertFalse(/\p{Emoji_Tag_Sequence}/u.test("\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}_"));
assertFalse(/\p{Emoji_ZWJ_Sequence}/u.test("\u{1F468}\u200D\u2764\uFE0F\u200D_"));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment