Commit 301d74ce authored by jgruber's avatar jgruber Committed by Commit Bot

Revert "[regexp] Limit ATOM regexps to patterns length <= 2"

This reverts commit 10817205.

While increasing the number of IRREGEXP regexp instances (vs. ATOM)
gives us a 3% perf improvement, it also results in higher memory
overhead. This CL is the suspected culprit for the recent 5x increase
in OOM crashes from within regexp codegen.

Bug: v8:6633, chromium:790833
Change-Id: Icca70b31fbda8cfb7a63dc895f6665dfe534359d
Reviewed-on: https://chromium-review.googlesource.com/817294Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#49963}
parent e1de82b4
......@@ -98,12 +98,36 @@ ContainedInLattice AddRange(ContainedInLattice containment,
return containment;
}
// Generic RegExp methods. Dispatches to implementation specific methods.
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
// In a 3-character pattern you can maximally step forwards 3 characters
// at a time, which is not always enough to pay for the extra logic.
const int kPatternTooShortForBoyerMoore = 2;
// Identifies the sort of regexps where the regexp engine is faster
// than the code used for atom matches.
static bool HasFewDifferentCharacters(Handle<String> pattern) {
int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
if (length <= kPatternTooShortForBoyerMoore) return false;
const int kMod = 128;
bool character_found[kMod];
int different = 0;
memset(&character_found[0], 0, sizeof(character_found));
for (int i = 0; i < length; i++) {
int ch = (pattern->Get(i) & (kMod - 1));
if (!character_found[ch]) {
character_found[ch] = true;
different++;
// We declare a regexp low-alphabet if it has at least 3 times as many
// characters as it has different characters.
if (different * 3 > length) return false;
}
}
return true;
}
// Generic RegExp methods. Dispatches to implementation specific methods.
MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
......@@ -133,7 +157,7 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
bool has_been_compiled = false;
if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
pattern->length() <= kPatternTooShortForBoyerMoore) {
!HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true;
......@@ -141,12 +165,11 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
if (!IgnoreCase(atom->flags()) &&
atom_pattern.length() <= kPatternTooShortForBoyerMoore) {
Handle<String> atom_string;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, atom_string,
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
Handle<String> atom_string;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, atom_string,
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
AtomCompile(re, pattern, flags, atom_string);
has_been_compiled = true;
}
......@@ -2981,8 +3004,6 @@ static void EmitHat(RegExpCompiler* compiler,
on_success->Emit(compiler, &new_trace);
}
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment