Commit 4edfaa1f authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

Reland "[regexp] Escape newlines when setting [[OriginalSource]]"

This is a reland of 0e22ec73

Original change's description:
> [regexp] Escape newlines when setting [[OriginalSource]]
> 
> This escapes LineTerminator characters in a regexp pattern when
> creating the string that will be stored in the [[OriginalSource]] slot.
> 
> As an example, the source property for all following objects will equal
> "\n" (a '\' character followed by 'n'):
> 
>   /\n/
>   new RegExp("\n")
>   new RegExp("\\n")
> 
> Bug: v8:1982, chromium:855009
> Change-Id: I3b539497a0697e3d51ec969cae49308b0b312a19
> Reviewed-on: https://chromium-review.googlesource.com/c/1384316
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Yang Guo <yangguo@chromium.org>
> Reviewed-by: Mathias Bynens <mathias@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#58387}

Bug: v8:1982, chromium:855009
Change-Id: I1ba22395477ec37e8e8c944000f9beade1e3250b
Reviewed-on: https://chromium-review.googlesource.com/c/1386495Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#58419}
parent 27421011
......@@ -16765,33 +16765,52 @@ Handle<JSRegExp> JSRegExp::Copy(Handle<JSRegExp> regexp) {
return Handle<JSRegExp>::cast(isolate->factory()->CopyJSObject(regexp));
}
namespace {
template <typename Char>
inline int CountRequiredEscapes(Handle<String> source) {
int CountRequiredEscapes(Handle<String> source) {
DisallowHeapAllocation no_gc;
int escapes = 0;
Vector<const Char> src = source->GetCharVector<Char>(no_gc);
for (int i = 0; i < src.length(); i++) {
if (src[i] == '\\') {
const Char c = src[i];
if (c == '\\') {
// Escape. Skip next character;
i++;
} else if (src[i] == '/') {
} else if (c == '/') {
// Not escaped forward-slash needs escape.
escapes++;
} else if (c == '\n') {
escapes++;
} else if (c == '\r') {
escapes++;
} else if (static_cast<int>(c) == 0x2028) {
escapes += std::strlen("\\u2028") - 1;
} else if (static_cast<int>(c) == 0x2029) {
escapes += std::strlen("\\u2029") - 1;
} else {
DCHECK(!unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c)));
}
}
return escapes;
}
template <typename Char>
void WriteStringToCharVector(Vector<Char> v, int* d, const char* string) {
int s = 0;
while (string[s] != '\0') v[(*d)++] = string[s++];
}
template <typename Char, typename StringType>
inline Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
Handle<StringType> result) {
DisallowHeapAllocation no_gc;
Vector<const Char> src = source->GetCharVector<Char>(no_gc);
Vector<Char> dst(result->GetChars(no_gc), result->length());
int s = 0;
int d = 0;
// TODO(v8:1982): Fully implement
// https://tc39.github.io/ecma262/#sec-escaperegexppattern
while (s < src.length()) {
if (src[s] == '\\') {
// Escape. Copy this and next character.
......@@ -16800,6 +16819,22 @@ inline Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
} else if (src[s] == '/') {
// Not escaped forward-slash needs escape.
dst[d++] = '\\';
} else if (src[s] == '\n') {
WriteStringToCharVector(dst, &d, "\\n");
s++;
continue;
} else if (src[s] == '\r') {
WriteStringToCharVector(dst, &d, "\\r");
s++;
continue;
} else if (static_cast<int>(src[s]) == 0x2028) {
WriteStringToCharVector(dst, &d, "\\u2028");
s++;
continue;
} else if (static_cast<int>(src[s]) == 0x2029) {
WriteStringToCharVector(dst, &d, "\\u2029");
s++;
continue;
}
dst[d++] = src[s++];
}
......@@ -16807,7 +16842,6 @@ inline Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
return result;
}
MaybeHandle<String> EscapeRegExpSource(Isolate* isolate,
Handle<String> source) {
DCHECK(source->IsFlat());
......@@ -16832,6 +16866,7 @@ MaybeHandle<String> EscapeRegExpSource(Isolate* isolate,
}
}
} // namespace
// static
MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
......
......@@ -808,3 +808,19 @@ assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));
assertFalse(/\uDB88|\uDBEC|aa/.test(""));
assertFalse(/\uDB88|\uDBEC|aa/u.test(""));
// EscapeRegExpPattern
assertEquals("\\n", /\n/.source);
assertEquals("\\n", new RegExp("\n").source);
assertEquals("\\n", new RegExp("\\n").source);
assertEquals("\\\\n", /\\n/.source);
assertEquals("\\r", /\r/.source);
assertEquals("\\r", new RegExp("\r").source);
assertEquals("\\r", new RegExp("\\r").source);
assertEquals("\\\\r", /\\r/.source);
assertEquals("\\u2028", /\u2028/.source);
assertEquals("\\u2028", new RegExp("\u2028").source);
assertEquals("\\u2028", new RegExp("\\u2028").source);
assertEquals("\\u2029", /\u2029/.source);
assertEquals("\\u2029", new RegExp("\u2029").source);
assertEquals("\\u2029", new RegExp("\\u2029").source);
......@@ -479,9 +479,6 @@
'built-ins/TypedArrayConstructors/internals/Set/key-is-out-of-bounds': [FAIL],
'built-ins/TypedArrayConstructors/internals/Set/BigInt/key-is-out-of-bounds': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=5329
'built-ins/RegExp/prototype/source/value-line-terminator': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=5112
'annexB/language/eval-code/direct/func-block-decl-eval-func-no-skip-try': [FAIL],
'annexB/language/eval-code/direct/func-if-decl-else-decl-a-eval-func-no-skip-try': [FAIL],
......
......@@ -43,13 +43,13 @@ PASS testForwardSlash("x/x/x", "x\/x\/x"); is true
PASS testForwardSlash("x\/x/x", "x\/x\/x"); is true
PASS testForwardSlash("x/x\/x", "x\/x\/x"); is true
PASS testForwardSlash("x\/x\/x", "x\/x\/x"); is true
FAIL testLineTerminator("\n"); should be false. Was true.
PASS testLineTerminator("\n"); is false
PASS testLineTerminator("\\n"); is false
FAIL testLineTerminator("\r"); should be false. Was true.
PASS testLineTerminator("\r"); is false
PASS testLineTerminator("\\r"); is false
FAIL testLineTerminator("\u2028"); should be false. Was true.
PASS testLineTerminator("\u2028"); is false
PASS testLineTerminator("\\u2028"); is false
FAIL testLineTerminator("\u2029"); should be false. Was true.
PASS testLineTerminator("\u2029"); is false
PASS testLineTerminator("\\u2029"); is false
FAIL RegExp('[/]').source should be [/]. Was [\/].
PASS RegExp('\\[/]').source is '\\[\\/]'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment