Commit 6a0e7224 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regexp] Limit the size of inlined choice nodes

Codegen for unicode property escapes (e.g.: /\p{L}/u) can produce huge
code objects. This effect can be further magnified through inlining,
leading to exponential code growth in the size of the pattern.

This CL is a (fairly hacky) way to avoid exponential growth. We
recognize choice nodes with 'many' choices and disable inlining for
them. In the future we should fix this properly, either by using the
code size budget correctly, or by improving codegen for property
escapes.

Bug: v8:10441
Change-Id: I817f145251ec8b1b9906cc735c9e9bdb004c98ed
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2170229
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67433}
parent 817d5c43
...@@ -135,9 +135,10 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -135,9 +135,10 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
} }
// This could be a Smi kUninitializedValue or Code. // This could be a Smi kUninitializedValue or Code.
Object Code(bool is_latin1) const; V8_EXPORT_PRIVATE Object Code(bool is_latin1) const;
// This could be a Smi kUninitializedValue or ByteArray. // This could be a Smi kUninitializedValue or ByteArray.
Object Bytecode(bool is_latin1) const; V8_EXPORT_PRIVATE Object Bytecode(bool is_latin1) const;
bool ShouldProduceBytecode(); bool ShouldProduceBytecode();
inline bool HasCompiledCode() const; inline bool HasCompiledCode() const;
inline void DiscardCompiledCodeForSerialization(); inline void DiscardCompiledCodeForSerialization();
......
...@@ -439,6 +439,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, ...@@ -439,6 +439,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter); AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter); AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
return result; return result;
} }
} else { } else {
......
...@@ -237,6 +237,15 @@ class RegExpNode : public ZoneObject { ...@@ -237,6 +237,15 @@ class RegExpNode : public ZoneObject {
eats_at_least_ = eats_at_least; eats_at_least_ = eats_at_least;
} }
// TODO(v8:10441): This is a hacky way to avoid exponential code size growth
// for very large choice nodes that can be generated by unicode property
// escapes. In order to avoid inlining (i.e. trace recursion), we pretend to
// have generated the maximum count of code copies already.
// We should instead fix this properly, e.g. by using the code size budget
// (flush_budget) or by generating property escape matches as calls to a C
// function.
void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; }
BoyerMooreLookahead* bm_info(bool not_at_start) { BoyerMooreLookahead* bm_info(bool not_at_start) {
return bm_info_[not_at_start ? 1 : 0]; return bm_info_[not_at_start ? 1 : 0];
} }
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include "src/utils/ostreams.h" #include "src/utils/ostreams.h"
#include "src/zone/zone-list-inl.h" #include "src/zone/zone-list-inl.h"
#include "test/cctest/cctest.h" #include "test/cctest/cctest.h"
#include "test/common/wasm/flag-utils.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
...@@ -2341,6 +2342,31 @@ TEST(PeepholeLabelFixupsComplex) { ...@@ -2341,6 +2342,31 @@ TEST(PeepholeLabelFixupsComplex) {
} }
} }
TEST(UnicodePropertyEscapeCodeSize) {
i::FlagScope<bool> f(&v8::internal::FLAG_regexp_tier_up, false);
LocalContext env;
v8::HandleScope scope(CcTest::isolate());
i::Handle<i::JSRegExp> re = Utils::OpenHandle(
*CompileRun("const r = /\\p{L}\\p{L}\\p{L}/u; r.exec('\\u200b'); r;")
.As<v8::RegExp>());
static constexpr int kMaxSize = 150 * KB;
static constexpr bool kIsNotLatin1 = false;
Object maybe_code = re->Code(kIsNotLatin1);
Object maybe_bytecode = re->Bytecode(kIsNotLatin1);
if (maybe_bytecode.IsByteArray()) {
// On x64, excessive inlining produced >250KB.
CHECK_LT(ByteArray::cast(maybe_bytecode).Size(), kMaxSize);
} else if (maybe_code.IsCode()) {
// On x64, excessive inlining produced >360KB.
CHECK_LT(Code::cast(maybe_code).Size(), kMaxSize);
CHECK_EQ(Code::cast(maybe_code).kind(), Code::REGEXP);
} else {
UNREACHABLE();
}
}
#undef CHECK_PARSE_ERROR #undef CHECK_PARSE_ERROR
#undef CHECK_SIMPLE #undef CHECK_SIMPLE
#undef CHECK_MIN_MAX #undef CHECK_MIN_MAX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment