Commit 7924985f authored by Yang Guo's avatar Yang Guo Committed by Commit Bot

[regexp] Throw for patterns like /[\p{...}-\p{...}]/u.

Bug: v8:4743
Change-Id: Iacb7681e679faa1ece77c577a2585363f6ef87a2
Reviewed-on: https://chromium-review.googlesource.com/582010
Commit-Queue: Yang Guo <yangguo@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#46857}
parent dc778a3d
...@@ -5843,7 +5843,7 @@ static void AddClassNegated(const int *elmv, ...@@ -5843,7 +5843,7 @@ static void AddClassNegated(const int *elmv,
ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone); ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
} }
void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
bool add_unicode_case_equivalents, bool add_unicode_case_equivalents,
Zone* zone) { Zone* zone) {
if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) { if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
...@@ -5866,7 +5866,7 @@ void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, ...@@ -5866,7 +5866,7 @@ void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
AddClassEscape(type, ranges, zone); AddClassEscape(type, ranges, zone);
} }
void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
Zone* zone) { Zone* zone) {
switch (type) { switch (type) {
case 's': case 's':
......
...@@ -80,10 +80,10 @@ class CharacterRange { ...@@ -80,10 +80,10 @@ class CharacterRange {
CharacterRange() : from_(0), to_(0) {} CharacterRange() : from_(0), to_(0) {}
// For compatibility with the CHECK_OK macro // For compatibility with the CHECK_OK macro
CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
Zone* zone); Zone* zone);
// Add class escapes. Add case equivalent closure for \w and \W if necessary. // Add class escapes. Add case equivalent closure for \w and \W if necessary.
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
bool add_unicode_case_equivalents, Zone* zone); bool add_unicode_case_equivalents, Zone* zone);
static Vector<const int> GetWordBounds(); static Vector<const int> GetWordBounds();
static inline CharacterRange Singleton(uc32 value) { static inline CharacterRange Singleton(uc32 value) {
......
...@@ -1476,11 +1476,12 @@ uc32 RegExpParser::ParseClassCharacterEscape() { ...@@ -1476,11 +1476,12 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
return 0; return 0;
} }
void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { Zone* zone,
DCHECK_EQ(0, *char_class); bool add_unicode_case_equivalents,
uc32 first = current(); uc32* char_out, bool* is_class_escape) {
if (first == '\\') { uc32 current_char = current();
if (current_char == '\\') {
switch (Next()) { switch (Next()) {
case 'w': case 'w':
case 'W': case 'W':
...@@ -1488,57 +1489,37 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { ...@@ -1488,57 +1489,37 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
case 'D': case 'D':
case 's': case 's':
case 'S': { case 'S': {
*char_class = Next(); CharacterRange::AddClassEscape(static_cast<char>(Next()), ranges,
add_unicode_case_equivalents, zone);
Advance(2); Advance(2);
return CharacterRange::Singleton(0); // Return dummy value. *is_class_escape = true;
return;
} }
case kEndMarker: case kEndMarker:
return ReportError(CStrVector("\\ at end of pattern")); ReportError(CStrVector("\\ at end of pattern"));
return;
case 'p':
case 'P':
if (FLAG_harmony_regexp_property && unicode()) {
bool negate = Next() == 'P';
Advance(2);
if (!ParsePropertyClass(ranges, negate)) {
ReportError(CStrVector("Invalid property name in character class"));
}
*is_class_escape = true;
return;
}
break;
default: default:
first = ParseClassCharacterEscape(CHECK_FAILED); break;
} }
*char_out = ParseClassCharacterEscape();
*is_class_escape = false;
} else { } else {
Advance(); Advance();
*char_out = current_char;
*is_class_escape = false;
} }
return CharacterRange::Singleton(first);
}
static const uc16 kNoCharClass = 0;
// Adds range or pre-defined character class to character ranges.
// If char_class is not kInvalidClass, it's interpreted as a class
// escape (i.e., 's' means whitespace, from '\s').
static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
uc16 char_class, CharacterRange range,
bool add_unicode_case_equivalents,
Zone* zone) {
if (char_class != kNoCharClass) {
CharacterRange::AddClassEscape(char_class, ranges,
add_unicode_case_equivalents, zone);
} else {
ranges->Add(range, zone);
}
}
bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {
if (!FLAG_harmony_regexp_property) return false;
if (!unicode()) return false;
if (current() != '\\') return false;
uc32 next = Next();
bool parse_success = false;
if (next == 'p') {
Advance(2);
parse_success = ParsePropertyClass(ranges, false);
} else if (next == 'P') {
Advance(2);
parse_success = ParsePropertyClass(ranges, true);
} else {
return false;
}
if (!parse_success)
ReportError(CStrVector("Invalid property name in character class"));
return parse_success;
} }
RegExpTree* RegExpParser::ParseCharacterClass() { RegExpTree* RegExpParser::ParseCharacterClass() {
...@@ -1557,10 +1538,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() { ...@@ -1557,10 +1538,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
new (zone()) ZoneList<CharacterRange>(2, zone()); new (zone()) ZoneList<CharacterRange>(2, zone());
bool add_unicode_case_equivalents = unicode() && ignore_case(); bool add_unicode_case_equivalents = unicode() && ignore_case();
while (has_more() && current() != ']') { while (has_more() && current() != ']') {
bool parsed_property = ParseClassProperty(ranges CHECK_FAILED); uc32 char_1, char_2;
if (parsed_property) continue; bool is_class_1, is_class_2;
uc16 char_class = kNoCharClass; ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1,
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); &is_class_1 CHECK_FAILED);
if (current() == '-') { if (current() == '-') {
Advance(); Advance();
if (current() == kEndMarker) { if (current() == kEndMarker) {
...@@ -1568,34 +1549,30 @@ RegExpTree* RegExpParser::ParseCharacterClass() { ...@@ -1568,34 +1549,30 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
// following code report an error. // following code report an error.
break; break;
} else if (current() == ']') { } else if (current() == ']') {
AddRangeOrEscape(ranges, char_class, first, if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
add_unicode_case_equivalents, zone());
ranges->Add(CharacterRange::Singleton('-'), zone()); ranges->Add(CharacterRange::Singleton('-'), zone());
break; break;
} }
uc16 char_class_2 = kNoCharClass; ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2,
CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); &is_class_2 CHECK_FAILED);
if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { if (is_class_1 || is_class_2) {
// Either end is an escaped character class. Treat the '-' verbatim. // Either end is an escaped character class. Treat the '-' verbatim.
if (unicode()) { if (unicode()) {
// ES2015 21.2.2.15.1 step 1. // ES2015 21.2.2.15.1 step 1.
return ReportError(CStrVector(kRangeInvalid)); return ReportError(CStrVector(kRangeInvalid));
} }
AddRangeOrEscape(ranges, char_class, first, if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
add_unicode_case_equivalents, zone());
ranges->Add(CharacterRange::Singleton('-'), zone()); ranges->Add(CharacterRange::Singleton('-'), zone());
AddRangeOrEscape(ranges, char_class_2, next, if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone());
add_unicode_case_equivalents, zone());
continue; continue;
} }
// ES2015 21.2.2.15.1 step 6. // ES2015 21.2.2.15.1 step 6.
if (first.from() > next.to()) { if (char_1 > char_2) {
return ReportError(CStrVector(kRangeOutOfOrder)); return ReportError(CStrVector(kRangeOutOfOrder));
} }
ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); ranges->Add(CharacterRange::Range(char_1, char_2), zone());
} else { } else {
AddRangeOrEscape(ranges, char_class, first, add_unicode_case_equivalents, if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
zone());
} }
} }
if (!has_more()) { if (!has_more()) {
......
...@@ -184,8 +184,14 @@ class RegExpParser BASE_EMBEDDED { ...@@ -184,8 +184,14 @@ class RegExpParser BASE_EMBEDDED {
// can be reparsed. // can be reparsed.
bool ParseBackReferenceIndex(int* index_out); bool ParseBackReferenceIndex(int* index_out);
bool ParseClassProperty(ZoneList<CharacterRange>* result); // Parse inside a class. Either add escaped class to the range, or return
CharacterRange ParseClassAtom(uc16* char_class); // false and pass parsed single character through |char_out|.
void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone,
bool add_unicode_case_equivalents, uc32* char_out,
bool* is_class_escape);
char ParseClassEscape();
RegExpTree* ReportError(Vector<const char> message); RegExpTree* ReportError(Vector<const char> message);
void Advance(); void Advance();
void Advance(int dist); void Advance(int dist);
......
...@@ -222,8 +222,8 @@ void TestRegExpParser(bool lookbehind) { ...@@ -222,8 +222,8 @@ void TestRegExpParser(bool lookbehind) {
CheckParseEq("[\\d]", "[0-9]"); CheckParseEq("[\\d]", "[0-9]");
CheckParseEq("[x\\dz]", "[x 0-9 z]"); CheckParseEq("[x\\dz]", "[x 0-9 z]");
CheckParseEq("[\\d-z]", "[0-9 - z]"); CheckParseEq("[\\d-z]", "[0-9 - z]");
CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]"); CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
CheckParseEq("[z-\\d]", "[z - 0-9]"); CheckParseEq("[z-\\d]", "[0-9 z -]");
// Control character outside character class. // Control character outside character class.
CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'"); CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
CheckParseEq("\\c!", "'\\c!'"); CheckParseEq("\\c!", "'\\c!'");
......
...@@ -9,9 +9,10 @@ assertThrows("/[\\p{garbage}]/u"); ...@@ -9,9 +9,10 @@ assertThrows("/[\\p{garbage}]/u");
assertThrows("/[\\p{}]/u"); assertThrows("/[\\p{}]/u");
assertThrows("/[\\p{]/u"); assertThrows("/[\\p{]/u");
assertThrows("/[\\p}]/u"); assertThrows("/[\\p}]/u");
assertThrows("/^[\\p{Lu}-\\p{Ll}]+$/u");
assertTrue(/^[\p{Lu}\p{Ll}]+$/u.test("ABCabc")); assertTrue(/^[\p{Lu}\p{Ll}]+$/u.test("ABCabc"));
assertTrue(/^[\p{Lu}-\p{Ll}]+$/u.test("ABC-abc")); assertTrue(/^[\p{Lu}-]+$/u.test("ABC-"));
assertFalse(/^[\P{Lu}\p{Ll}]+$/u.test("ABCabc")); assertFalse(/^[\P{Lu}\p{Ll}]+$/u.test("ABCabc"));
assertTrue(/^[\P{Lu}\p{Ll}]+$/u.test("abc")); assertTrue(/^[\P{Lu}\p{Ll}]+$/u.test("abc"));
assertTrue(/^[\P{Lu}]+$/u.test("abc123")); assertTrue(/^[\P{Lu}]+$/u.test("abc123"));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment