Commit 17b9d872 authored by Patrick Thier's avatar Patrick Thier Committed by Commit Bot

[regexp] Add missing case for EscapeRegExpPattern

EscapeRegExpPattern should return a string representation of a
RegExp instance that in turn can be used to construct a new
RegExp instance with the same internal state as the original one.

Previous versions incorrectly escaped '/' also inside character classes
(e.g. /[/]/ returned "[\/]").

This patch properly escapes '/' when necessary and omits unnecessary
escapes.

Bug: v8:8615, v8:1982, v8:9446
Change-Id: I4ecb993dc69d6976f4637cedf43465cd0c32e427
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1688050
Commit-Queue: Patrick Thier <pthier@google.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarMathias Bynens <mathias@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62587}
parent a6eabacf
......@@ -6140,15 +6140,20 @@ template <typename Char>
int CountRequiredEscapes(Handle<String> source) {
DisallowHeapAllocation no_gc;
int escapes = 0;
bool in_char_class = false;
Vector<const Char> src = source->GetCharVector<Char>(no_gc);
for (int i = 0; i < src.length(); i++) {
const Char c = src[i];
if (c == '\\') {
// Escape. Skip next character;
i++;
} else if (c == '/') {
} else if (c == '/' && !in_char_class) {
// Not escaped forward-slash needs escape.
escapes++;
} else if (c == '[') {
in_char_class = true;
} else if (c == ']') {
in_char_class = false;
} else if (c == '\n') {
escapes++;
} else if (c == '\r') {
......@@ -6161,6 +6166,7 @@ int CountRequiredEscapes(Handle<String> source) {
DCHECK(!unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c)));
}
}
DCHECK(!in_char_class);
return escapes;
}
......@@ -6178,16 +6184,19 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
Vector<Char> dst(result->GetChars(no_gc), result->length());
int s = 0;
int d = 0;
// TODO(v8:1982): Fully implement
// https://tc39.github.io/ecma262/#sec-escaperegexppattern
bool in_char_class = false;
while (s < src.length()) {
if (src[s] == '\\') {
// Escape. Copy this and next character.
dst[d++] = src[s++];
if (s == src.length()) break;
} else if (src[s] == '/') {
} else if (src[s] == '/' && !in_char_class) {
// Not escaped forward-slash needs escape.
dst[d++] = '\\';
} else if (src[s] == '[') {
in_char_class = true;
} else if (src[s] == ']') {
in_char_class = false;
} else if (src[s] == '\n') {
WriteStringToCharVector(dst, &d, "\\n");
s++;
......@@ -6208,6 +6217,7 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
dst[d++] = src[s++];
}
DCHECK_EQ(result->length(), d);
DCHECK(!in_char_class);
return result;
}
......@@ -6264,13 +6274,13 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
source = String::Flatten(isolate, source);
RETURN_ON_EXCEPTION(isolate, RegExp::Compile(isolate, regexp, source, flags),
JSRegExp);
Handle<String> escaped_source;
ASSIGN_RETURN_ON_EXCEPTION(isolate, escaped_source,
EscapeRegExpSource(isolate, source), JSRegExp);
RETURN_ON_EXCEPTION(isolate, RegExp::Compile(isolate, regexp, source, flags),
JSRegExp);
regexp->set_source(*escaped_source);
regexp->set_flags(Smi::FromInt(flags));
......
......@@ -824,3 +824,14 @@ assertEquals("\\u2028", new RegExp("\\u2028").source);
assertEquals("\\u2029", /\u2029/.source);
assertEquals("\\u2029", new RegExp("\u2029").source);
assertEquals("\\u2029", new RegExp("\\u2029").source);
assertEquals("[/]", /[/]/.source);
assertEquals("[\\/]", /[\/]/.source);
assertEquals("[\\\\/]", /[\\/]/.source);
assertEquals("[/]", new RegExp("[/]").source);
assertEquals("[/]", new RegExp("[\/]").source);
assertEquals("[\\/]", new RegExp("[\\/]").source);
assertEquals("[[/]", /[[/]/.source);
assertEquals("[/]]", /[/]]/.source);
assertEquals("[[/]]", /[[/]]/.source);
assertEquals("[[\\/]", /[[\/]/.source);
assertEquals("[[\\/]]", /[[\/]]/.source);
......@@ -126,6 +126,10 @@
# no_i18n build has an old ICU data and Georgian is treated as unicameral.
'ecma/String/15.5.4.12-3': [FAIL, ['no_i18n == True', PASS]],
# Expecations in this test don't match the spec.
# See also https://bugs.chromium.org/p/v8/issues/detail?id=9446
'js1_5/Regress/regress-248444': [FAIL],
##################### SKIPPED TESTS #####################
# This test checks that we behave properly in an out-of-memory
......
......@@ -51,7 +51,7 @@ PASS testLineTerminator("\u2028"); is false
PASS testLineTerminator("\\u2028"); is false
PASS testLineTerminator("\u2029"); is false
PASS testLineTerminator("\\u2029"); is false
FAIL RegExp('[/]').source should be [/]. Was [\/].
PASS RegExp('[/]').source is '[/]'
PASS RegExp('\\[/]').source is '\\[\\/]'
PASS var o = new RegExp(); o.toString() === '/'+o.source+'/' && eval(o.toString()+'.exec(String())') is [""]
PASS successfullyParsed is true
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment