Commit 17b9d872 authored by Patrick Thier's avatar Patrick Thier Committed by Commit Bot

[regexp] Add missing case for EscapeRegExpPattern

EscapeRegExpPattern should return a string representation of a
RegExp instance that in turn can be used to construct a new
RegExp instance with the same internal state as the original one.

Previous versions incorrectly escaped '/' also inside character classes
(e.g. /[/]/ returned "[\/]").

This patch properly escapes '/' when necessary and omits unnecessary
escapes.

Bug: v8:8615, v8:1982, v8:9446
Change-Id: I4ecb993dc69d6976f4637cedf43465cd0c32e427
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1688050
Commit-Queue: Patrick Thier <pthier@google.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarMathias Bynens <mathias@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62587}
parent a6eabacf
...@@ -6140,15 +6140,20 @@ template <typename Char> ...@@ -6140,15 +6140,20 @@ template <typename Char>
int CountRequiredEscapes(Handle<String> source) { int CountRequiredEscapes(Handle<String> source) {
DisallowHeapAllocation no_gc; DisallowHeapAllocation no_gc;
int escapes = 0; int escapes = 0;
bool in_char_class = false;
Vector<const Char> src = source->GetCharVector<Char>(no_gc); Vector<const Char> src = source->GetCharVector<Char>(no_gc);
for (int i = 0; i < src.length(); i++) { for (int i = 0; i < src.length(); i++) {
const Char c = src[i]; const Char c = src[i];
if (c == '\\') { if (c == '\\') {
// Escape. Skip next character; // Escape. Skip next character;
i++; i++;
} else if (c == '/') { } else if (c == '/' && !in_char_class) {
// Not escaped forward-slash needs escape. // Not escaped forward-slash needs escape.
escapes++; escapes++;
} else if (c == '[') {
in_char_class = true;
} else if (c == ']') {
in_char_class = false;
} else if (c == '\n') { } else if (c == '\n') {
escapes++; escapes++;
} else if (c == '\r') { } else if (c == '\r') {
...@@ -6161,6 +6166,7 @@ int CountRequiredEscapes(Handle<String> source) { ...@@ -6161,6 +6166,7 @@ int CountRequiredEscapes(Handle<String> source) {
DCHECK(!unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c))); DCHECK(!unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c)));
} }
} }
DCHECK(!in_char_class);
return escapes; return escapes;
} }
...@@ -6178,16 +6184,19 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source, ...@@ -6178,16 +6184,19 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
Vector<Char> dst(result->GetChars(no_gc), result->length()); Vector<Char> dst(result->GetChars(no_gc), result->length());
int s = 0; int s = 0;
int d = 0; int d = 0;
// TODO(v8:1982): Fully implement bool in_char_class = false;
// https://tc39.github.io/ecma262/#sec-escaperegexppattern
while (s < src.length()) { while (s < src.length()) {
if (src[s] == '\\') { if (src[s] == '\\') {
// Escape. Copy this and next character. // Escape. Copy this and next character.
dst[d++] = src[s++]; dst[d++] = src[s++];
if (s == src.length()) break; if (s == src.length()) break;
} else if (src[s] == '/') { } else if (src[s] == '/' && !in_char_class) {
// Not escaped forward-slash needs escape. // Not escaped forward-slash needs escape.
dst[d++] = '\\'; dst[d++] = '\\';
} else if (src[s] == '[') {
in_char_class = true;
} else if (src[s] == ']') {
in_char_class = false;
} else if (src[s] == '\n') { } else if (src[s] == '\n') {
WriteStringToCharVector(dst, &d, "\\n"); WriteStringToCharVector(dst, &d, "\\n");
s++; s++;
...@@ -6208,6 +6217,7 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source, ...@@ -6208,6 +6217,7 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
dst[d++] = src[s++]; dst[d++] = src[s++];
} }
DCHECK_EQ(result->length(), d); DCHECK_EQ(result->length(), d);
DCHECK(!in_char_class);
return result; return result;
} }
...@@ -6264,13 +6274,13 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp, ...@@ -6264,13 +6274,13 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
source = String::Flatten(isolate, source); source = String::Flatten(isolate, source);
RETURN_ON_EXCEPTION(isolate, RegExp::Compile(isolate, regexp, source, flags),
JSRegExp);
Handle<String> escaped_source; Handle<String> escaped_source;
ASSIGN_RETURN_ON_EXCEPTION(isolate, escaped_source, ASSIGN_RETURN_ON_EXCEPTION(isolate, escaped_source,
EscapeRegExpSource(isolate, source), JSRegExp); EscapeRegExpSource(isolate, source), JSRegExp);
RETURN_ON_EXCEPTION(isolate, RegExp::Compile(isolate, regexp, source, flags),
JSRegExp);
regexp->set_source(*escaped_source); regexp->set_source(*escaped_source);
regexp->set_flags(Smi::FromInt(flags)); regexp->set_flags(Smi::FromInt(flags));
......
...@@ -824,3 +824,14 @@ assertEquals("\\u2028", new RegExp("\\u2028").source); ...@@ -824,3 +824,14 @@ assertEquals("\\u2028", new RegExp("\\u2028").source);
assertEquals("\\u2029", /\u2029/.source); assertEquals("\\u2029", /\u2029/.source);
assertEquals("\\u2029", new RegExp("\u2029").source); assertEquals("\\u2029", new RegExp("\u2029").source);
assertEquals("\\u2029", new RegExp("\\u2029").source); assertEquals("\\u2029", new RegExp("\\u2029").source);
assertEquals("[/]", /[/]/.source);
assertEquals("[\\/]", /[\/]/.source);
assertEquals("[\\\\/]", /[\\/]/.source);
assertEquals("[/]", new RegExp("[/]").source);
assertEquals("[/]", new RegExp("[\/]").source);
assertEquals("[\\/]", new RegExp("[\\/]").source);
assertEquals("[[/]", /[[/]/.source);
assertEquals("[/]]", /[/]]/.source);
assertEquals("[[/]]", /[[/]]/.source);
assertEquals("[[\\/]", /[[\/]/.source);
assertEquals("[[\\/]]", /[[\/]]/.source);
...@@ -126,6 +126,10 @@ ...@@ -126,6 +126,10 @@
# no_i18n build has an old ICU data and Georgian is treated as unicameral. # no_i18n build has an old ICU data and Georgian is treated as unicameral.
'ecma/String/15.5.4.12-3': [FAIL, ['no_i18n == True', PASS]], 'ecma/String/15.5.4.12-3': [FAIL, ['no_i18n == True', PASS]],
# Expecations in this test don't match the spec.
# See also https://bugs.chromium.org/p/v8/issues/detail?id=9446
'js1_5/Regress/regress-248444': [FAIL],
##################### SKIPPED TESTS ##################### ##################### SKIPPED TESTS #####################
# This test checks that we behave properly in an out-of-memory # This test checks that we behave properly in an out-of-memory
......
...@@ -51,7 +51,7 @@ PASS testLineTerminator("\u2028"); is false ...@@ -51,7 +51,7 @@ PASS testLineTerminator("\u2028"); is false
PASS testLineTerminator("\\u2028"); is false PASS testLineTerminator("\\u2028"); is false
PASS testLineTerminator("\u2029"); is false PASS testLineTerminator("\u2029"); is false
PASS testLineTerminator("\\u2029"); is false PASS testLineTerminator("\\u2029"); is false
FAIL RegExp('[/]').source should be [/]. Was [\/]. PASS RegExp('[/]').source is '[/]'
PASS RegExp('\\[/]').source is '\\[\\/]' PASS RegExp('\\[/]').source is '\\[\\/]'
PASS var o = new RegExp(); o.toString() === '/'+o.source+'/' && eval(o.toString()+'.exec(String())') is [""] PASS var o = new RegExp(); o.toString() === '/'+o.source+'/' && eval(o.toString()+'.exec(String())') is [""]
PASS successfullyParsed is true PASS successfullyParsed is true
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment