Commit 6691d531 authored by lrn@chromium.org's avatar lrn@chromium.org

Revert 5911 (RegExp fail on invalid range syntax).

Review URL: http://codereview.chromium.org/5703001

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5953 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 649a4710
...@@ -4452,7 +4452,6 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { ...@@ -4452,7 +4452,6 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
RegExpTree* RegExpParser::ParseCharacterClass() { RegExpTree* RegExpParser::ParseCharacterClass() {
static const char* kUnterminated = "Unterminated character class"; static const char* kUnterminated = "Unterminated character class";
static const char* kRangeOutOfOrder = "Range out of order in character class"; static const char* kRangeOutOfOrder = "Range out of order in character class";
static const char* kInvalidRange = "Invalid character range";
ASSERT_EQ(current(), '['); ASSERT_EQ(current(), '[');
Advance(); Advance();
...@@ -4461,28 +4460,12 @@ RegExpTree* RegExpParser::ParseCharacterClass() { ...@@ -4461,28 +4460,12 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
is_negated = true; is_negated = true;
Advance(); Advance();
} }
// A CharacterClass is a sequence of single characters, character class
// escapes or ranges. Ranges are on the form "x-y" where x and y are
// single characters (and not character class escapes like \s).
// A "-" may occur at the start or end of the character class (just after
// "[" or "[^", or just before "]") without being considered part of a
// range. A "-" may also appear as the beginning or end of a range.
// I.e., [--+] is valid, so is [!--].
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
while (has_more() && current() != ']') { while (has_more() && current() != ']') {
uc16 char_class = 0; uc16 char_class = 0;
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
if (char_class) { if (char_class) {
CharacterRange::AddClassEscape(char_class, ranges); CharacterRange::AddClassEscape(char_class, ranges);
if (current() == '-') {
Advance();
ranges->Add(CharacterRange::Singleton('-'));
if (current() != ']') {
ReportError(CStrVector(kInvalidRange) CHECK_FAILED);
}
break;
}
continue; continue;
} }
if (current() == '-') { if (current() == '-') {
...@@ -4498,7 +4481,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() { ...@@ -4498,7 +4481,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
} }
CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED); CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED);
if (char_class) { if (char_class) {
ReportError(CStrVector(kInvalidRange) CHECK_FAILED); ranges->Add(first);
ranges->Add(CharacterRange::Singleton('-'));
CharacterRange::AddClassEscape(char_class, ranges);
continue;
} }
if (first.from() > next.to()) { if (first.from() > next.to()) {
return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);
......
...@@ -173,6 +173,9 @@ TEST(Parser) { ...@@ -173,6 +173,9 @@ TEST(Parser) {
CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
CHECK_PARSE_EQ("[\\d]", "[0-9]"); CHECK_PARSE_EQ("[\\d]", "[0-9]");
CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]"); CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
"'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'"); "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
CHECK_PARSE_EQ("\\c!", "'c!'"); CHECK_PARSE_EQ("\\c!", "'c!'");
......
...@@ -110,6 +110,44 @@ assertFalse(re.test("\\]")); ...@@ -110,6 +110,44 @@ assertFalse(re.test("\\]"));
assertFalse(re.test("\x03]")); // I.e., read as \cc assertFalse(re.test("\x03]")); // I.e., read as \cc
// Test that we handle \s and \S correctly inside some bizarre
// character classes.
re = /[\s-:]/;
assertTrue(re.test('-'));
assertTrue(re.test(':'));
assertTrue(re.test(' '));
assertTrue(re.test('\t'));
assertTrue(re.test('\n'));
assertFalse(re.test('a'));
assertFalse(re.test('Z'));
re = /[\S-:]/;
assertTrue(re.test('-'));
assertTrue(re.test(':'));
assertFalse(re.test(' '));
assertFalse(re.test('\t'));
assertFalse(re.test('\n'));
assertTrue(re.test('a'));
assertTrue(re.test('Z'));
re = /[^\s-:]/;
assertFalse(re.test('-'));
assertFalse(re.test(':'));
assertFalse(re.test(' '));
assertFalse(re.test('\t'));
assertFalse(re.test('\n'));
assertTrue(re.test('a'));
assertTrue(re.test('Z'));
re = /[^\S-:]/;
assertFalse(re.test('-'));
assertFalse(re.test(':'));
assertTrue(re.test(' '));
assertTrue(re.test('\t'));
assertTrue(re.test('\n'));
assertFalse(re.test('a'));
assertFalse(re.test('Z'));
re = /[\s]/; re = /[\s]/;
assertFalse(re.test('-')); assertFalse(re.test('-'));
assertFalse(re.test(':')); assertFalse(re.test(':'));
...@@ -609,47 +647,3 @@ assertEquals(4, re.exec("zimzamzumba").index); ...@@ -609,47 +647,3 @@ assertEquals(4, re.exec("zimzamzumba").index);
assertEquals(["bc"], re.exec("zimzomzumbc")); assertEquals(["bc"], re.exec("zimzomzumbc"));
assertFalse(re.test("c")); assertFalse(re.test("c"));
assertFalse(re.test("")); assertFalse(re.test(""));
function testInvalidRange(str) {
try {
RegExp(str).test("x");
} catch (e) {
return;
}
assetUnreachable("Allowed invalid range in " + str);
}
function testValidRange(str) {
try {
RegExp(str).test("x");
} catch (e) {
assertUnreachable("Shouldn't fail parsing: " + str + ", was: " + e);
}
}
testInvalidRange("[\\d-z]");
testInvalidRange("[z-\\d]");
testInvalidRange("[\\d-\\d]");
testInvalidRange("[z-x]"); // Larger value first.
testInvalidRange("[x-\\d-\\d]");
testValidRange("[x-z]");
testValidRange("[!--\d]"); // Second "-" is end of range.
testValidRange("[\d-]");
testValidRange("[-\d]");
testValidRange("[-\d-]");
testValidRange("[^-\d-]");
testValidRange("[^-\d-]");
testValidRange("[0-9-\w]");
// Escaped dashes do not count as range operators.
testValidRange("[\\d\\-z]");
testValidRange("[z\\-\\d]");
testValidRange("[\\d\\-\\d]");
testValidRange("[z\\-x]");
testValidRange("[x\\-\\d\\-\\d]");
...@@ -962,7 +962,7 @@ res[882] = /[az-]+/; ...@@ -962,7 +962,7 @@ res[882] = /[az-]+/;
res[883] = /[a\-z]+/; res[883] = /[a\-z]+/;
res[884] = /[a-z]+/; res[884] = /[a-z]+/;
res[885] = /[\d-]+/; res[885] = /[\d-]+/;
// res[886] - Disabled after making [\d-z] invalid to be compatible with JSC. res[886] = /[\d-z]+/;
res[887] = /\x5c/; res[887] = /\x5c/;
res[888] = /\x20Z/; res[888] = /\x20Z/;
res[889] = /ab{3cd/; res[889] = /ab{3cd/;
...@@ -1346,7 +1346,7 @@ res[1266] = /((Z)+|A)*/; ...@@ -1346,7 +1346,7 @@ res[1266] = /((Z)+|A)*/;
res[1267] = /(Z()|A)*/; res[1267] = /(Z()|A)*/;
res[1268] = /(Z(())|A)*/; res[1268] = /(Z(())|A)*/;
res[1269] = /a*/g; res[1269] = /a*/g;
// res[1270] disabled after making /^[\d-a]/ invalid to be compatible with JSC. res[1270] = /^[\d-a]/;
res[1271] = /[[:space:]]+/; res[1271] = /[[:space:]]+/;
res[1272] = /[[:blank:]]+/; res[1272] = /[[:blank:]]+/;
res[1273] = /[\s]+/; res[1273] = /[\s]+/;
...@@ -2530,7 +2530,7 @@ assertEquals(null, res[431].exec("a\x0db ", 882)); ...@@ -2530,7 +2530,7 @@ assertEquals(null, res[431].exec("a\x0db ", 882));
assertEquals(null, res[431].exec("a\x85b", 883)); assertEquals(null, res[431].exec("a\x85b", 883));
assertThrows("var re = /(?-+a)/;", 884); assertThrows("var re = /(?-+a)/;", 884);
assertEquals(null, res[443].exec("aaaa", 885)); assertEquals(null, res[443].exec("aaaa", 885));
// assertEquals(null, res[443].exec("bacxxx", 886)); assertEquals(null, res[443].exec("bacxxx", 886));
assertEquals(null, res[443].exec("bbaccxxx ", 887)); assertEquals(null, res[443].exec("bbaccxxx ", 887));
assertEquals(null, res[443].exec("bbbacccxx", 888)); assertEquals(null, res[443].exec("bbbacccxx", 888));
assertEquals(null, res[443].exec("aaaa", 889)); assertEquals(null, res[443].exec("aaaa", 889));
...@@ -4391,10 +4391,9 @@ assertEquals("abcdxyz", res[884].exec("abcdxyz"), 2743); ...@@ -4391,10 +4391,9 @@ assertEquals("abcdxyz", res[884].exec("abcdxyz"), 2743);
assertEquals("12-34", res[885].exec("12-34"), 2744); assertEquals("12-34", res[885].exec("12-34"), 2744);
assertEquals(null, res[885].exec("*** Failers", 2745)); assertEquals(null, res[885].exec("*** Failers", 2745));
assertEquals(null, res[885].exec("aaa", 2746)); assertEquals(null, res[885].exec("aaa", 2746));
// Disabled. To be compatible with JSC, the regexp is no longer valid. assertEquals("12-34z", res[886].exec("12-34z"), 2747);
// assertEquals("12-34z", res[886].exec("12-34z"), 2747); assertEquals(null, res[886].exec("*** Failers", 2748));
// assertEquals(null, res[886].exec("*** Failers", 2748)); assertEquals(null, res[886].exec("aaa", 2749));
// assertEquals(null, res[886].exec("aaa", 2749));
assertEquals("\\", res[887].exec("\\\\"), 2750); assertEquals("\\", res[887].exec("\\\\"), 2750);
assertEquals(" Z", res[888].exec("the Zoo"), 2751); assertEquals(" Z", res[888].exec("the Zoo"), 2751);
assertEquals(null, res[888].exec("*** Failers", 2752)); assertEquals(null, res[888].exec("*** Failers", 2752));
...@@ -5356,12 +5355,11 @@ assertEquals("", res[1269].exec("-things"), 3707); ...@@ -5356,12 +5355,11 @@ assertEquals("", res[1269].exec("-things"), 3707);
assertEquals("", res[1269].exec("0digit"), 3708); assertEquals("", res[1269].exec("0digit"), 3708);
assertEquals("", res[1269].exec("*** Failers"), 3709); assertEquals("", res[1269].exec("*** Failers"), 3709);
assertEquals("", res[1269].exec("bcdef "), 3710); assertEquals("", res[1269].exec("bcdef "), 3710);
// Disabled. To be compatible with JSC, the RegExp is no longer valid. assertEquals("a", res[1270].exec("abcde"), 3711);
// assertEquals("a", res[1270].exec("abcde"), 3711); assertEquals("-", res[1270].exec("-things"), 3712);
// assertEquals("-", res[1270].exec("-things"), 3712); assertEquals("0", res[1270].exec("0digit"), 3713);
// assertEquals("0", res[1270].exec("0digit"), 3713); assertEquals(null, res[1270].exec("*** Failers", 3714));
// assertEquals(null, res[1270].exec("*** Failers", 3714)); assertEquals(null, res[1270].exec("bcdef ", 3715));
// assertEquals(null, res[1270].exec("bcdef ", 3715));
assertEquals(null, res[1271].exec("> \x09\n\x0c\x0d\x0b<", 3716)); assertEquals(null, res[1271].exec("> \x09\n\x0c\x0d\x0b<", 3716));
assertEquals(null, res[1271].exec(" ", 3717)); assertEquals(null, res[1271].exec(" ", 3717));
assertEquals(null, res[1272].exec("> \x09\n\x0c\x0d\x0b<", 3718)); assertEquals(null, res[1272].exec("> \x09\n\x0c\x0d\x0b<", 3718));
......
...@@ -292,10 +292,6 @@ js1_2/regexp/RegExp_multiline_as_array: FAIL_OK ...@@ -292,10 +292,6 @@ js1_2/regexp/RegExp_multiline_as_array: FAIL_OK
js1_2/regexp/beginLine: FAIL_OK js1_2/regexp/beginLine: FAIL_OK
js1_2/regexp/endLine: FAIL_OK js1_2/regexp/endLine: FAIL_OK
# To be compatible with JSC, we no longer accept [\d-x], [x-\d] or
# [\d-\d] as valid ranges.
ecma_3/RegExp/regress-375715-02: FAIL
js1_5/extensions/regress-351463-01: FAIL
# To be compatible with safari typeof a regexp yields 'function'; # To be compatible with safari typeof a regexp yields 'function';
# in firefox it yields 'object'. # in firefox it yields 'object'.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment