Commit d287f225 authored by erik.corry's avatar erik.corry Committed by Commit bot

Limit code size generated for very large regexps

R=jkummerow@chromium.org, yangguo@chromium.org
BUG=

Review URL: https://codereview.chromium.org/799403003

Cr-Commit-Position: refs/heads/master@{#25839}
parent 978f41a1
......@@ -1004,6 +1004,8 @@ class RegExpCompiler {
inline bool ignore_case() { return ignore_case_; }
inline bool one_byte() { return one_byte_; }
inline bool optimize() { return optimize_; }
inline void set_optimize(bool value) { optimize_ = value; }
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
int current_expansion_factor() { return current_expansion_factor_; }
......@@ -1024,6 +1026,7 @@ class RegExpCompiler {
bool ignore_case_;
bool one_byte_;
bool reg_exp_too_big_;
bool optimize_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
Zone* zone_;
......@@ -1056,6 +1059,7 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,
ignore_case_(ignore_case),
one_byte_(one_byte),
reg_exp_too_big_(false),
optimize_(FLAG_regexp_optimization),
current_expansion_factor_(1),
frequency_collator_(),
zone_(zone) {
......@@ -1071,16 +1075,6 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble(
Handle<String> pattern) {
Heap* heap = pattern->GetHeap();
bool use_slow_safe_regexp_compiler = false;
if (heap->total_regexp_code_generated() >
RegExpImpl::kRegWxpCompiledLimit &&
heap->isolate()->memory_allocator()->SizeExecutable() >
RegExpImpl::kRegExpExecutableMemoryLimit) {
use_slow_safe_regexp_compiler = true;
}
macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
#ifdef DEBUG
if (FLAG_trace_regexp_assembler)
macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
......@@ -2236,8 +2230,7 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
// We are being asked to make a non-generic version. Keep track of how many
// non-generic versions we generate so as not to overdo it.
trace_count_++;
if (FLAG_regexp_optimization &&
trace_count_ < kMaxCopiesCodeGenerated &&
if (compiler->optimize() && trace_count_ < kMaxCopiesCodeGenerated &&
compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
return CONTINUE;
}
......@@ -4116,15 +4109,12 @@ void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
}
alt_gen->expects_preload = preload->preload_is_current_;
bool generate_full_check_inline = false;
if (FLAG_regexp_optimization &&
if (compiler->optimize() &&
try_to_emit_quick_check_for_alternative(i == 0) &&
alternative.node()->EmitQuickCheck(compiler,
trace,
&new_trace,
preload->preload_has_checked_bounds_,
&alt_gen->possible_success,
&alt_gen->quick_check_details,
fall_through_on_failure)) {
alternative.node()->EmitQuickCheck(
compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
&alt_gen->possible_success, &alt_gen->quick_check_details,
fall_through_on_failure)) {
// Quick check was generated for this choice.
preload->preload_is_current_ = true;
preload->preload_has_checked_bounds_ = true;
......@@ -4922,7 +4912,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
if (body_can_be_empty) {
body_start_reg = compiler->AllocateRegister();
} else if (FLAG_regexp_optimization && !needs_capture_clearing) {
} else if (compiler->optimize() && !needs_capture_clearing) {
// Only unroll if there are no captures and the body can't be
// empty.
{
......@@ -6020,6 +6010,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
}
RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
compiler.set_optimize(!TooMuchRegExpCode(pattern));
// Sample some characters from the middle of the string.
static const int kSampleSize = 128;
......@@ -6122,6 +6114,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
RegExpMacroAssemblerIrregexp macro_assembler(codes, zone);
#endif // V8_INTERPRETED_REGEXP
macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern));
// Inserted here, instead of in Assembler, because it depends on information
// in the AST that isn't replicated in the Node structure.
static const int kMaxBacksearchLimit = 1024;
......@@ -6145,4 +6139,14 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
}
bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
Heap* heap = pattern->GetHeap();
bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit &&
heap->isolate()->memory_allocator()->SizeExecutable() >
RegExpImpl::kRegExpExecutableMemoryLimit) {
too_much = true;
}
return too_much;
}
}} // namespace v8::internal
......@@ -212,7 +212,8 @@ class RegExpImpl {
// total regexp code compiled including code that has subsequently been freed
// and the total executable memory at any point.
static const int kRegExpExecutableMemoryLimit = 16 * MB;
static const int kRegWxpCompiledLimit = 1 * MB;
static const int kRegExpCompiledLimit = 1 * MB;
static const int kRegExpTooLargeToOptimize = 10 * KB;
private:
static bool CompileIrregexp(Handle<JSRegExp> re,
......@@ -1665,6 +1666,8 @@ class RegExpEngine: public AllStatic {
Handle<String> sample_subject,
bool is_one_byte, Zone* zone);
static bool TooMuchRegExpCode(Handle<String> pattern);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};
......
......@@ -1694,6 +1694,64 @@ TEST(TestInternalWeakListsTraverseWithGC) {
}
TEST(TestSizeOfRegExpCode) {
if (!FLAG_regexp_optimization) return;
v8::V8::Initialize();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
LocalContext context;
// Adjust source below and this check to match
// RegExpImple::kRegExpTooLargeToOptimize.
DCHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 10 * KB);
// Compile a regexp that is much larger if we are using regexp optimizations.
CompileRun(
"var reg_exp_source = '(?:a|bc|def|ghij|klmno|pqrstu)';"
"var half_size_reg_exp;"
"while (reg_exp_source.length < 10 * 1024) {"
" half_size_reg_exp = reg_exp_source;"
" reg_exp_source = reg_exp_source + reg_exp_source;"
"}"
// Flatten string.
"reg_exp_source.match(/f/);");
// Get initial heap size after several full GCs, which will stabilize
// the heap size and return with sweeping finished completely.
CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
MarkCompactCollector* collector = CcTest::heap()->mark_compact_collector();
if (collector->sweeping_in_progress()) {
collector->EnsureSweepingCompleted();
}
int initial_size = static_cast<int>(CcTest::heap()->SizeOfObjects());
CompileRun("'foo'.match(reg_exp_source);");
CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
int size_with_regexp = static_cast<int>(CcTest::heap()->SizeOfObjects());
CompileRun("'foo'.match(half_size_reg_exp);");
CcTest::heap()->CollectAllGarbage(Heap::kNoGCFlags);
int size_with_optimized_regexp =
static_cast<int>(CcTest::heap()->SizeOfObjects());
int size_of_regexp_code = size_with_regexp - initial_size;
CHECK_LE(size_of_regexp_code, 500 * KB);
// Small regexp is half the size, but compiles to more than twice the code
// due to the optimization steps.
CHECK_GE(size_with_optimized_regexp,
size_with_regexp + size_of_regexp_code * 2);
}
TEST(TestSizeOfObjects) {
v8::V8::Initialize();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment