Commit 5b42e3f3 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regexp] Assign proper flags to TextNode

This fixes a case in which we forgot to assign flags to TextNodes
created through

AddBmpCharacters
AddNonBmpSurrogatePairs
AddLoneLeadSurrogates
AddLoneTrailSurrogates

functions. If these initially had a flag (e.g. case-insensitive 'i')
set, that information was lost. This bug resulted in missing case
folding in no_i18n builds (perhaps other things as well that just
aren't covered by our test suite).

Cq-Include-Trybots: luci.v8.try:v8_linux_noi18n_rel_ng
Bug: v8:10131,v8:10120
Change-Id: Icef4f0dbd47971a538e07bab2f1067c383fd59c6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2423718Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70106}
parent 5d5b728b
......@@ -199,25 +199,24 @@ ZoneList<CharacterRange>* ToCanonicalZoneList(
}
void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
RegExpNode* on_success, UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
ZoneList<CharacterRange>* bmp =
ToCanonicalZoneList(splitter->bmp(), compiler->zone());
if (bmp == nullptr) return;
JSRegExp::Flags default_flags = JSRegExp::Flags();
result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
compiler->zone(), bmp, compiler->read_backward(), on_success,
default_flags)));
compiler->zone(), bmp, compiler->read_backward(), on_success, flags)));
}
void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) {
UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
ZoneList<CharacterRange>* non_bmp =
ToCanonicalZoneList(splitter->non_bmp(), compiler->zone());
if (non_bmp == nullptr) return;
DCHECK(!compiler->one_byte());
Zone* zone = compiler->zone();
JSRegExp::Flags default_flags = JSRegExp::Flags();
CharacterRange::Canonicalize(non_bmp);
for (int i = 0; i < non_bmp->length(); i++) {
// Match surrogate pair.
......@@ -237,7 +236,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, to_t), compiler->read_backward(),
on_success, default_flags)));
on_success, flags)));
} else {
if (from_t != kTrailSurrogateStart) {
// Add [from_l][from_t-\udfff]
......@@ -245,7 +244,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, kTrailSurrogateEnd),
compiler->read_backward(), on_success, default_flags)));
compiler->read_backward(), on_success, flags)));
from_l++;
}
if (to_t != kTrailSurrogateEnd) {
......@@ -254,7 +253,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(to_l),
CharacterRange::Range(kTrailSurrogateStart, to_t),
compiler->read_backward(), on_success, default_flags)));
compiler->read_backward(), on_success, flags)));
to_l--;
}
if (from_l <= to_l) {
......@@ -263,7 +262,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Range(from_l, to_l),
CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
compiler->read_backward(), on_success, default_flags)));
compiler->read_backward(), on_success, flags)));
}
}
}
......@@ -302,8 +301,8 @@ RegExpNode* MatchAndNegativeLookaroundInReadDirection(
void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) {
JSRegExp::Flags default_flags = JSRegExp::Flags();
UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
ZoneList<CharacterRange>* lead_surrogates =
ToCanonicalZoneList(splitter->lead_surrogates(), compiler->zone());
if (lead_surrogates == nullptr) return;
......@@ -317,22 +316,20 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
// Reading backward. Assert that reading forward, there is no trail
// surrogate, and then backward match the lead surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
compiler, trail_surrogates, lead_surrogates, on_success, true,
default_flags);
compiler, trail_surrogates, lead_surrogates, on_success, true, flags);
} else {
// Reading forward. Forward match the lead surrogate and assert that
// no trail surrogate follows.
match = MatchAndNegativeLookaroundInReadDirection(
compiler, lead_surrogates, trail_surrogates, on_success, false,
default_flags);
compiler, lead_surrogates, trail_surrogates, on_success, false, flags);
}
result->AddAlternative(GuardedAlternative(match));
}
void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) {
JSRegExp::Flags default_flags = JSRegExp::Flags();
UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
ZoneList<CharacterRange>* trail_surrogates =
ToCanonicalZoneList(splitter->trail_surrogates(), compiler->zone());
if (trail_surrogates == nullptr) return;
......@@ -346,14 +343,12 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
// Reading backward. Backward match the trail surrogate and assert that no
// lead surrogate precedes it.
match = MatchAndNegativeLookaroundInReadDirection(
compiler, trail_surrogates, lead_surrogates, on_success, true,
default_flags);
compiler, trail_surrogates, lead_surrogates, on_success, true, flags);
} else {
// Reading forward. Assert that reading backward, there is no lead
// surrogate, and then forward match the trail surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
compiler, lead_surrogates, trail_surrogates, on_success, false,
default_flags);
compiler, lead_surrogates, trail_surrogates, on_success, false, flags);
}
result->AddAlternative(GuardedAlternative(match));
}
......@@ -436,10 +431,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
} else {
ChoiceNode* result = zone->New<ChoiceNode>(2, zone);
UnicodeRangeSplitter splitter(ranges);
AddBmpCharacters(compiler, result, on_success, &splitter);
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
AddBmpCharacters(compiler, result, on_success, &splitter, flags_);
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter, flags_);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter, flags_);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter, flags_);
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
return result;
......
......@@ -2,6 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// ASCII case folding with two-byte subject string.
assertTrue(/[a]/ui.test("\u{20a0}a"));
assertTrue(/[a]/ui.test("\u{20a0}A"));
assertTrue(/[A]/ui.test("\u{20a0}a"));
assertTrue(/[A]/ui.test("\u{20a0}A"));
// Non-unicode use toUpperCase mappings.
assertFalse(/[\u00e5]/i.test("\u212b"));
assertFalse(/[\u212b]/i.test("\u00e5\u1234"));
......@@ -29,24 +35,20 @@ assertFalse(/\u{10400}/ui.test("\u{10428}"));
assertFalse(/\ud801\udc00/ui.test("\u{10428}"));
assertFalse(/[\u{10428}]/ui.test("\u{10400}"));
assertFalse(/[\ud801\udc28]/ui.test("\u{10400}"));
assertEquals(["\uff21\u{10400}"],
/[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
// TODO(v8:10120): Investigate why these don't behave as expected.
{
// assertEquals(["\uff21\u{10400}"],
// /[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
assertEquals(["\u{10400}"],
/[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
}
{
// Should be:
// assertEquals(["abc"], /[^\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc\uff23"));
//
// But is:
assertEquals(["\u{ff21}"], /[^\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc\uff23"));
}
{
// assertEquals(["\uff53\u24bb"],
// /[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
assertEquals(null,
/[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
}
assertEquals(["\uff53\u24bb"],
/[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
// Full mappings are ignored.
assertFalse(/\u00df/ui.test("SS"));
......
......@@ -420,17 +420,18 @@
# noi18n build cannot parse characters in supplementary plane.
'harmony/regexp-named-captures': [FAIL],
# noi18n cannot turn on ICU backend for Date
'icu-date-to-string': [SKIP],
'icu-date-lord-howe': [SKIP],
'tzoffset-transition-apia': [SKIP],
'tzoffset-transition-lord-howe': [SKIP],
'tzoffset-transition-moscow': [SKIP],
'tzoffset-transition-new-york': [SKIP],
'tzoffset-seoul': [SKIP],
# noi18n cannot turn on ICU backend for Date. Anything goes as long as we
# don't crash.
'icu-date-to-string': [PASS,FAIL],
'icu-date-lord-howe': [PASS,FAIL],
'tzoffset-transition-apia': [PASS,FAIL],
'tzoffset-transition-lord-howe': [PASS,FAIL],
'tzoffset-transition-moscow': [PASS,FAIL],
'tzoffset-transition-new-york': [PASS,FAIL],
'tzoffset-seoul': [PASS,FAIL],
# noi18n is required for Intl
'regress/regress-crbug-1052647': [SKIP],
'regress/regress-crbug-1052647': [PASS,FAIL],
}], # 'no_i18n'
##############################################################################
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment