Commit 52b4fb00 authored by jgruber's avatar jgruber Committed by Commit Bot

[regexp] Preserve invariant of non-empty character class

The irregexp compiler expects RegExpCharacterClass instances to
contain at least one range. This preserves that invariant when parsing
the negated \P{Any} unicode property.

Bug: chromium:793588
Change-Id: If71fdce014a7e64d8af559084ee88108303d694b
Reviewed-on: https://chromium-review.googlesource.com/827010Reviewed-by: 's avatarMathias Bynens <mathias@chromium.org>
Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Reviewed-by: 's avatarErik Corry <erikcorry@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50146}
parent ffe7919f
......@@ -2577,6 +2577,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
details->positions(characters_filled_in);
RegExpCharacterClass* tree = elm.char_class();
ZoneList<CharacterRange>* ranges = tree->ranges(zone());
DCHECK(!ranges->is_empty());
if (tree->is_negated()) {
// A quick check uses multi-character mask and compare. There is no
// useful way to incorporate a negative char class into this scheme
......@@ -3273,9 +3274,9 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
JSRegExp::Flags flags) {
DCHECK_NOT_NULL(ranges);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
elms->Add(
TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, flags)),
zone);
elms->Add(TextElement::CharClass(
new (zone) RegExpCharacterClass(zone, ranges, flags)),
zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
......@@ -3288,10 +3289,10 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
elms->Add(TextElement::CharClass(
new (zone) RegExpCharacterClass(lead_ranges, flags)),
new (zone) RegExpCharacterClass(zone, lead_ranges, flags)),
zone);
elms->Add(TextElement::CharClass(
new (zone) RegExpCharacterClass(trail_ranges, flags)),
new (zone) RegExpCharacterClass(zone, trail_ranges, flags)),
zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
......@@ -5109,10 +5110,9 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
ranges = negated;
}
if (ranges->length() == 0) {
JSRegExp::Flags default_flags = JSRegExp::Flags();
ranges->Add(CharacterRange::Everything(), zone);
JSRegExp::Flags default_flags;
RegExpCharacterClass* fail =
new (zone) RegExpCharacterClass(ranges, default_flags, NEGATED);
new (zone) RegExpCharacterClass(zone, ranges, default_flags);
return new (zone) TextNode(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
......@@ -5366,8 +5366,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (IsUnicode(flags) && contains_trail_surrogate) {
character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
}
alternatives->at(write_posn++) =
new (zone) RegExpCharacterClass(ranges, flags, character_class_flags);
alternatives->at(write_posn++) = new (zone)
RegExpCharacterClass(zone, ranges, flags, character_class_flags);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {
......
......@@ -306,11 +306,17 @@ class RegExpCharacterClass final : public RegExpTree {
typedef base::Flags<Flag> CharacterClassFlags;
RegExpCharacterClass(
ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
CharacterClassFlags character_class_flags = CharacterClassFlags())
: set_(ranges),
flags_(flags),
character_class_flags_(character_class_flags) {}
character_class_flags_(character_class_flags) {
// Convert the empty set of ranges to the negated Everything() range.
if (ranges->is_empty()) {
ranges->Add(CharacterRange::Everything(), zone);
character_class_flags_ ^= NEGATED;
}
}
RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
: set_(type),
flags_(flags),
......@@ -352,7 +358,7 @@ class RegExpCharacterClass final : public RegExpTree {
private:
CharacterSet set_;
const JSRegExp::Flags flags_;
const CharacterClassFlags character_class_flags_;
CharacterClassFlags character_class_flags_;
};
......
......@@ -285,7 +285,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
RegExpCharacterClass* cc =
new (zone()) RegExpCharacterClass(ranges, builder->flags());
new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
break;
}
......@@ -332,8 +332,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
new (zone()) ZoneList<CharacterRange>(2, zone());
CharacterRange::AddClassEscape(
c, ranges, unicode() && builder->ignore_case(), zone());
RegExpCharacterClass* cc =
new (zone()) RegExpCharacterClass(ranges, builder->flags());
RegExpCharacterClass* cc = new (zone())
RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
break;
}
......@@ -348,8 +348,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
if (!ParsePropertyClass(ranges, p == 'P')) {
return ReportError(CStrVector("Invalid property name"));
}
RegExpCharacterClass* cc =
new (zone()) RegExpCharacterClass(ranges, builder->flags());
RegExpCharacterClass* cc = new (zone())
RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
} else {
// With /u, no identity escapes except for syntax characters
......@@ -1255,7 +1255,12 @@ bool LookupSpecialPropertyValueName(const char* name,
ZoneList<CharacterRange>* result,
bool negate, Zone* zone) {
if (NameEquals(name, "Any")) {
if (!negate) result->Add(CharacterRange::Everything(), zone);
if (negate) {
// Leave the list of character ranges empty, since the negation of 'Any'
// is the empty set.
} else {
result->Add(CharacterRange::Everything(), zone);
}
} else if (NameEquals(name, "ASCII")) {
result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
: CharacterRange::Range(0x0, 0x7F),
......@@ -1672,14 +1677,10 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
return ReportError(CStrVector(kUnterminated));
}
Advance();
if (ranges->length() == 0) {
ranges->Add(CharacterRange::Everything(), zone());
is_negated = !is_negated;
}
RegExpCharacterClass::CharacterClassFlags character_class_flags;
if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
return new (zone())
RegExpCharacterClass(ranges, builder->flags(), character_class_flags);
return new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags(),
character_class_flags);
}
......@@ -1853,7 +1854,8 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {
AddTerm(new (zone()) RegExpCharacterClass(
CharacterRange::List(zone(), CharacterRange::Singleton(c)), flags_));
zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)),
flags_));
}
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-property
assertNull(/a\P{Any}a/u.exec("a\u{d83d}a"));
assertEquals(["a\u{d83d}a"], /a\p{Any}a/u.exec("a\u{d83d}a"));
assertEquals(["a\u{d83d}a"], /(?:a\P{Any}a|a\p{Any}a)/u.exec("a\u{d83d}a"));
assertNull(/a[\P{Any}]a/u.exec("a\u{d83d}a"));
assertEquals(["a\u{d83d}a"], /a[^\P{Any}]a/u.exec("a\u{d83d}a"));
assertEquals(["a\u{d83d}a"], /a[^\P{Any}x]a/u.exec("a\u{d83d}a"));
assertNull(/a[^\P{Any}x]a/u.exec("axa"));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment