Commit 965fea65 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Switch regexp strategy for regexps that are just plain

strings with a small alphabet.  We already have code
that handles these regexps well, we were just not always
activating it.
Review URL: https://chromiumcodereview.appspot.com/9959096

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11218 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent d37104f6
......@@ -108,6 +108,36 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
}
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
// In a 3-character pattern you can maximally step forwards 3 characters
// at a time, which is not always enough to pay for the extra logic.
const int kPatternTooShortForBoyerMoore = 2;
// Identifies the sort of regexps where the regexp engine is faster
// than the code used for atom matches.
static bool HasFewDifferentCharacters(Handle<String> pattern) {
int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
if (length <= kPatternTooShortForBoyerMoore) return false;
const int kMod = 128;
bool character_found[kMod];
int different = 0;
memset(&character_found[0], 0, sizeof(character_found));
for (int i = 0; i < length; i++) {
int ch = (pattern->Get(i) & (kMod - 1));
if (!character_found[ch]) {
character_found[ch] = true;
different++;
// We declare a regexp low-alphabet if it has at least 3 times as many
// characters as it has different characters.
if (different * 3 > length) return false;
}
}
return true;
}
// Generic RegExp methods. Dispatches to implementation specific methods.
......@@ -141,9 +171,14 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
return Handle<Object>::null();
}
if (parse_result.simple && !flags.is_ignore_case()) {
bool has_been_compiled = false;
if (parse_result.simple &&
!flags.is_ignore_case() &&
!HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true;
} else if (parse_result.tree->IsAtom() &&
!flags.is_ignore_case() &&
parse_result.capture_count == 0) {
......@@ -151,8 +186,12 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Vector<const uc16> atom_pattern = atom->data();
Handle<String> atom_string =
isolate->factory()->NewStringFromTwoByte(atom_pattern);
AtomCompile(re, pattern, flags, atom_string);
} else {
if (!HasFewDifferentCharacters(atom_string)) {
AtomCompile(re, pattern, flags, atom_string);
has_been_compiled = true;
}
}
if (!has_been_compiled) {
IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
}
ASSERT(re->data()->IsFixedArray());
......@@ -3429,6 +3468,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
return true;
}
/* Code generation for choice nodes.
*
* We generate quick checks that do a mask and compare to eliminate a
......@@ -3507,7 +3547,6 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
* \______________/
*/
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
int choice_count = alternatives_->length();
......@@ -3578,9 +3617,6 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
bool skip_was_emitted = false;
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
if (!greedy_loop && choice_count == 2) {
GuardedAlternative alt1 = alternatives_->at(1);
if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment