Commit 390c7fed authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

Revert "[regexp] Correctly escape a backslash-newline sequence"

This reverts commit 7d1f95d6.

Reason for revert: Speculative revert for https://crbug.com/1046678

Original change's description:
> [regexp] Correctly escape a backslash-newline sequence
> 
> When printing the source string, a backslash-newline sequence ('\\\n',
> '\\\r', '\\\u2028', '\\\u2029') should be formatted as '\n', '\r',
> '\u2028', '\u2029', respectively. Prior to this CL it was formatted as
> a backslash followed by the literal newline character.
> 
> Bug: v8:8615
> Change-Id: Iac90195c56ea1707ea8469066b0cc967ea87fc73
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2016583
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Georg Neis <neis@chromium.org>
> Auto-Submit: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#65986}

TBR=neis@chromium.org,jgruber@chromium.org

# Not skipping CQ checks because original CL landed > 1 day ago.

Bug: v8:8615,chromium:1046678
Change-Id: If28626a1c6868ed848310c0d30cf61a73326f2c1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2027452Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66022}
parent e395871f
...@@ -6213,60 +6213,37 @@ void JSRegExp::MarkTierUpForNextExec() { ...@@ -6213,60 +6213,37 @@ void JSRegExp::MarkTierUpForNextExec() {
namespace { namespace {
bool IsLineTerminator(int c) {
// Expected to return true for '\n', '\r', 0x2028, and 0x2029.
return unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c));
}
// TODO(jgruber): Consider merging CountAdditionalEscapeChars and
// WriteEscapedRegExpSource into a single function to deduplicate dispatch logic
// and move related code closer to each other.
template <typename Char> template <typename Char>
int CountAdditionalEscapeChars(Handle<String> source, bool* needs_escapes_out) { int CountRequiredEscapes(Handle<String> source) {
DisallowHeapAllocation no_gc; DisallowHeapAllocation no_gc;
int escapes = 0; int escapes = 0;
bool needs_escapes = false;
bool in_char_class = false; bool in_char_class = false;
Vector<const Char> src = source->GetCharVector<Char>(no_gc); Vector<const Char> src = source->GetCharVector<Char>(no_gc);
for (int i = 0; i < src.length(); i++) { for (int i = 0; i < src.length(); i++) {
const Char c = src[i]; const Char c = src[i];
if (c == '\\') { if (c == '\\') {
if (i + 1 < src.length() && IsLineTerminator(src[i + 1])) { // Escape. Skip next character;
// This '\' is ignored since the next character itself will be escaped.
escapes--;
} else {
// Escape. Skip next character, which will be copied verbatim;
needs_escapes = true;
i++; i++;
}
} else if (c == '/' && !in_char_class) { } else if (c == '/' && !in_char_class) {
// Not escaped forward-slash needs escape. // Not escaped forward-slash needs escape.
needs_escapes = true;
escapes++; escapes++;
} else if (c == '[') { } else if (c == '[') {
in_char_class = true; in_char_class = true;
} else if (c == ']') { } else if (c == ']') {
in_char_class = false; in_char_class = false;
} else if (c == '\n') { } else if (c == '\n') {
needs_escapes = true;
escapes++; escapes++;
} else if (c == '\r') { } else if (c == '\r') {
needs_escapes = true;
escapes++; escapes++;
} else if (static_cast<int>(c) == 0x2028) { } else if (static_cast<int>(c) == 0x2028) {
needs_escapes = true;
escapes += std::strlen("\\u2028") - 1; escapes += std::strlen("\\u2028") - 1;
} else if (static_cast<int>(c) == 0x2029) { } else if (static_cast<int>(c) == 0x2029) {
needs_escapes = true;
escapes += std::strlen("\\u2029") - 1; escapes += std::strlen("\\u2029") - 1;
} else { } else {
DCHECK(!IsLineTerminator(c)); DCHECK(!unibrow::IsLineTerminator(static_cast<unibrow::uchar>(c)));
} }
} }
DCHECK(!in_char_class); DCHECK(!in_char_class);
DCHECK_GE(escapes, 0);
DCHECK_IMPLIES(escapes != 0, needs_escapes);
*needs_escapes_out = needs_escapes;
return escapes; return escapes;
} }
...@@ -6286,42 +6263,33 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source, ...@@ -6286,42 +6263,33 @@ Handle<StringType> WriteEscapedRegExpSource(Handle<String> source,
int d = 0; int d = 0;
bool in_char_class = false; bool in_char_class = false;
while (s < src.length()) { while (s < src.length()) {
const Char c = src[s]; if (src[s] == '\\') {
if (c == '\\') {
if (s + 1 < src.length() && IsLineTerminator(src[s + 1])) {
// This '\' is ignored since the next character itself will be escaped.
s++;
continue;
} else {
// Escape. Copy this and next character. // Escape. Copy this and next character.
dst[d++] = src[s++]; dst[d++] = src[s++];
}
if (s == src.length()) break; if (s == src.length()) break;
} else if (c == '/' && !in_char_class) { } else if (src[s] == '/' && !in_char_class) {
// Not escaped forward-slash needs escape. // Not escaped forward-slash needs escape.
dst[d++] = '\\'; dst[d++] = '\\';
} else if (c == '[') { } else if (src[s] == '[') {
in_char_class = true; in_char_class = true;
} else if (c == ']') { } else if (src[s] == ']') {
in_char_class = false; in_char_class = false;
} else if (c == '\n') { } else if (src[s] == '\n') {
WriteStringToCharVector(dst, &d, "\\n"); WriteStringToCharVector(dst, &d, "\\n");
s++; s++;
continue; continue;
} else if (c == '\r') { } else if (src[s] == '\r') {
WriteStringToCharVector(dst, &d, "\\r"); WriteStringToCharVector(dst, &d, "\\r");
s++; s++;
continue; continue;
} else if (static_cast<int>(c) == 0x2028) { } else if (static_cast<int>(src[s]) == 0x2028) {
WriteStringToCharVector(dst, &d, "\\u2028"); WriteStringToCharVector(dst, &d, "\\u2028");
s++; s++;
continue; continue;
} else if (static_cast<int>(c) == 0x2029) { } else if (static_cast<int>(src[s]) == 0x2029) {
WriteStringToCharVector(dst, &d, "\\u2029"); WriteStringToCharVector(dst, &d, "\\u2029");
s++; s++;
continue; continue;
} else {
DCHECK(!IsLineTerminator(c));
} }
dst[d++] = src[s++]; dst[d++] = src[s++];
} }
...@@ -6335,12 +6303,10 @@ MaybeHandle<String> EscapeRegExpSource(Isolate* isolate, ...@@ -6335,12 +6303,10 @@ MaybeHandle<String> EscapeRegExpSource(Isolate* isolate,
DCHECK(source->IsFlat()); DCHECK(source->IsFlat());
if (source->length() == 0) return isolate->factory()->query_colon_string(); if (source->length() == 0) return isolate->factory()->query_colon_string();
bool one_byte = String::IsOneByteRepresentationUnderneath(*source); bool one_byte = String::IsOneByteRepresentationUnderneath(*source);
bool needs_escapes = false; int escapes = one_byte ? CountRequiredEscapes<uint8_t>(source)
int additional_escape_chars = : CountRequiredEscapes<uc16>(source);
one_byte ? CountAdditionalEscapeChars<uint8_t>(source, &needs_escapes) if (escapes == 0) return source;
: CountAdditionalEscapeChars<uc16>(source, &needs_escapes); int length = source->length() + escapes;
if (!needs_escapes) return source;
int length = source->length() + additional_escape_chars;
if (one_byte) { if (one_byte) {
Handle<SeqOneByteString> result; Handle<SeqOneByteString> result;
ASSIGN_RETURN_ON_EXCEPTION(isolate, result, ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
......
...@@ -835,7 +835,3 @@ assertEquals("[/]]", /[/]]/.source); ...@@ -835,7 +835,3 @@ assertEquals("[/]]", /[/]]/.source);
assertEquals("[[/]]", /[[/]]/.source); assertEquals("[[/]]", /[[/]]/.source);
assertEquals("[[\\/]", /[[\/]/.source); assertEquals("[[\\/]", /[[\/]/.source);
assertEquals("[[\\/]]", /[[\/]]/.source); assertEquals("[[\\/]]", /[[\/]]/.source);
assertEquals("\\n", new RegExp("\\\n").source);
assertEquals("\\r", new RegExp("\\\r").source);
assertEquals("\\u2028", new RegExp("\\\u2028").source);
assertEquals("\\u2029", new RegExp("\\\u2029").source);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment