Commit 94bb378e authored by lrn@chromium.org's avatar lrn@chromium.org

Make RegExp character class match JSC.

See http://trac.webkit.org/changeset/73594

Review URL: http://codereview.chromium.org/5723002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5974 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 72f5bcb3
......@@ -4449,6 +4449,22 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
}
static const uc16 kNoCharClass = 0;
// Adds range or pre-defined character class to character ranges.
// If char_class is not kInvalidClass, it's interpreted as a class
// escape (i.e., 's' means whitespace, from '\s').
static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
uc16 char_class,
CharacterRange range) {
if (char_class != kNoCharClass) {
CharacterRange::AddClassEscape(char_class, ranges);
} else {
ranges->Add(range);
}
}
RegExpTree* RegExpParser::ParseCharacterClass() {
static const char* kUnterminated = "Unterminated character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
......@@ -4462,12 +4478,8 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
}
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
while (has_more() && current() != ']') {
uc16 char_class = 0;
uc16 char_class = kNoCharClass;
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
if (char_class) {
CharacterRange::AddClassEscape(char_class, ranges);
continue;
}
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
......@@ -4475,15 +4487,17 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
// following code report an error.
break;
} else if (current() == ']') {
ranges->Add(first);
AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
break;
}
CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED);
if (char_class) {
ranges->Add(first);
uc16 char_class_2 = kNoCharClass;
CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
// Either end is an escaped character class. Treat the '-' verbatim.
AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
CharacterRange::AddClassEscape(char_class, ranges);
AddRangeOrEscape(ranges, char_class_2, next);
continue;
}
if (first.from() > next.to()) {
......@@ -4491,7 +4505,7 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
}
ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
ranges->Add(first);
AddRangeOrEscape(ranges, char_class, first);
}
}
if (!has_more()) {
......
......@@ -202,6 +202,17 @@ assertFalse(re.test('\n'));
assertFalse(re.test('a'));
assertFalse(re.test('Z'));
// First - is treated as range operator, second as literal minus.
// This follows the specification in parsing, but doesn't throw on
// the \s at the beginning of the range.
re = /[\s-0-9]/;
assertTrue(re.test(' '));
assertTrue(re.test('\xA0'));
assertTrue(re.test('-'));
assertTrue(re.test('0'));
assertTrue(re.test('9'));
assertFalse(re.test('1'));
// Test beginning and end of line assertions with or without the
// multiline flag.
re = /^\d+/;
......@@ -647,3 +658,4 @@ assertEquals(4, re.exec("zimzamzumba").index);
assertEquals(["bc"], re.exec("zimzomzumbc"));
assertFalse(re.test("c"));
assertFalse(re.test(""));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment