Commit 5a8a7ed8 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

Revert "[regexp] Limit ATOM regexps to single-character patterns"

This reverts commit 062bb7d4.

Reason for revert: <INSERT REASONING HERE>

Original change's description:
> [regexp] Limit ATOM regexps to single-character patterns
> 
> There's an inherent trade-off when deciding between ATOM and IRREGEXP
> regexps: IRREGEXP is faster at runtime for all but trivial single-character
> patterns, while ATOM regexps have a lower memory overhead.
> 
> This CL is intended to help investigate impact on benchmarks and real-world
> code - if something tanks, it's easy to revert, otherwise it can be a first
> step towards a possible removal of ATOM regexps.
> 
> Bug: v8:6633
> Change-Id: Ia41d8eb28d33952735562d3d4127202746a6ac4e
> Reviewed-on: https://chromium-review.googlesource.com/589435
> Reviewed-by: Yang Guo <yangguo@chromium.org>
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#47081}

TBR=yangguo@chromium.org,jgruber@chromium.org

Change-Id: I8655bc4055af5d593f507e16918b434ff45f5379
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:6633
Reviewed-on: https://chromium-review.googlesource.com/599547Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47106}
parent 426ae426
...@@ -96,6 +96,37 @@ ContainedInLattice AddRange(ContainedInLattice containment, ...@@ -96,6 +96,37 @@ ContainedInLattice AddRange(ContainedInLattice containment,
return containment; return containment;
} }
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
// In a 3-character pattern you can maximally step forwards 3 characters
// at a time, which is not always enough to pay for the extra logic.
const int kPatternTooShortForBoyerMoore = 2;
// Identifies the sort of regexps where the regexp engine is faster
// than the code used for atom matches.
static bool HasFewDifferentCharacters(Handle<String> pattern) {
int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
if (length <= kPatternTooShortForBoyerMoore) return false;
const int kMod = 128;
bool character_found[kMod];
int different = 0;
memset(&character_found[0], 0, sizeof(character_found));
for (int i = 0; i < length; i++) {
int ch = (pattern->Get(i) & (kMod - 1));
if (!character_found[ch]) {
character_found[ch] = true;
different++;
// We declare a regexp low-alphabet if it has at least 3 times as many
// characters as it has different characters.
if (different * 3 > length) return false;
}
}
return true;
}
// Generic RegExp methods. Dispatches to implementation specific methods. // Generic RegExp methods. Dispatches to implementation specific methods.
...@@ -127,7 +158,7 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, ...@@ -127,7 +158,7 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
bool has_been_compiled = false; bool has_been_compiled = false;
if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) && if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&
!(flags & JSRegExp::kSticky) && pattern->length() == 1) { !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern. // Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern); AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true; has_been_compiled = true;
...@@ -135,11 +166,12 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, ...@@ -135,11 +166,12 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
!(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) { !(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom(); RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data(); Vector<const uc16> atom_pattern = atom->data();
if (atom_pattern.length() == 1) { Handle<String> atom_string;
Handle<String> atom_string; ASSIGN_RETURN_ON_EXCEPTION(
ASSIGN_RETURN_ON_EXCEPTION( isolate, atom_string,
isolate, atom_string, isolate->factory()->NewStringFromTwoByte(atom_pattern),
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object); Object);
if (!HasFewDifferentCharacters(atom_string)) {
AtomCompile(re, pattern, flags, atom_string); AtomCompile(re, pattern, flags, atom_string);
has_been_compiled = true; has_been_compiled = true;
} }
...@@ -2997,8 +3029,6 @@ static void EmitHat(RegExpCompiler* compiler, ...@@ -2997,8 +3029,6 @@ static void EmitHat(RegExpCompiler* compiler,
on_success->Emit(compiler, &new_trace); on_success->Emit(compiler, &new_trace);
} }
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
// Emit the code to handle \b and \B (word-boundary or non-word-boundary). // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment