Commit c54b5e5d authored by lrn@chromium.org's avatar lrn@chromium.org

Selecting between simple text search and Boyer-Moore now happens by trying

the simple search and bailing out and continuing with Boyer-Moore if the simple
version seems too expensive.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@498 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 19fceabf
...@@ -1150,28 +1150,47 @@ static int SingleCharIndexOf(Vector<const schar> string, ...@@ -1150,28 +1150,47 @@ static int SingleCharIndexOf(Vector<const schar> string,
} }
// Trivial string search for shorter strings. // Trivial string search for shorter strings.
// On return, if "complete" is set to true, the return value is the
// final result of searching for the patter in the subject.
// If "complete" is set to false, the return value is the index where
// further checking should start, i.e., it's guaranteed that the pattern
// does not occur at a position prior to the returned index.
template <typename pchar, typename schar> template <typename pchar, typename schar>
static int SimpleIndexOf(Vector<const schar> subject, static int SimpleIndexOf(Vector<const schar> subject,
Vector<const pchar> pattern, Vector<const pchar> pattern,
int start_index) { int start_index,
bool &complete) {
int pattern_length = pattern.length(); int pattern_length = pattern.length();
int subject_length = subject.length(); int subject_length = subject.length();
// Badness is a count of how many extra times the same character
// is checked. We compare it to the index counter, so we start
// it at the start_index, and give it a little discount to avoid
// very early bail-outs.
int badness = start_index - pattern_length;
// We know our pattern is at least 2 characters, we cache the first so // We know our pattern is at least 2 characters, we cache the first so
// the common case of the first character not matching is faster. // the common case of the first character not matching is faster.
pchar pattern_first_char = pattern[0]; pchar pattern_first_char = pattern[0];
for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) {
if (subject[i] != pattern_first_char) continue; if (subject[i] != pattern_first_char) continue;
int j = 1; int j = 1;
do { do {
if (pattern[j] != subject[j+i]) { if (pattern[j] != subject[i+j]) {
break; break;
} }
j++; j++;
} while (j < pattern_length); } while (j < pattern_length);
if (j == pattern_length) { if (j == pattern_length) {
complete = true;
return i; return i;
} }
badness += j;
if (badness > i) { // More than one extra character on average.
complete = false;
return (i + 1); // No matches up to index i+1.
}
} }
complete = true;
return -1; return -1;
} }
...@@ -1180,16 +1199,13 @@ template <typename schar, typename pchar> ...@@ -1180,16 +1199,13 @@ template <typename schar, typename pchar>
static int StringMatchStrategy(Vector<const schar> sub, static int StringMatchStrategy(Vector<const schar> sub,
Vector<const pchar> pat, Vector<const pchar> pat,
int start_index) { int start_index) {
int pattern_length = pat.length(); ASSERT(pat.length() > 1);
ASSERT(pattern_length > 1);
// For small searches, a complex sort is not worth the setup overhead. // For small searches, a complex sort is not worth the setup overhead.
int subject_length = sub.length() - start_index; bool complete;
if (subject_length < 100 || pattern_length < 8) { int idx = SimpleIndexOf(sub, pat, start_index, complete);
return SimpleIndexOf(sub, pat, start_index); if (complete) return idx;
} return BoyerMooreIndexOf(sub, pat, idx);
return BoyerMooreIndexOf(sub, pat, start_index);
} }
// Perform string match of pattern on subject, starting at start index. // Perform string match of pattern on subject, starting at start index.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment