Commit 2c85faf1 authored by lrn@chromium.org's avatar lrn@chromium.org

Refactored string search code.

Made string search state explicit for repreated calls (a StringSearch class).

Review URL: http://codereview.chromium.org/3467010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5550 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent d9d49052
......@@ -100,6 +100,7 @@ SOURCES = {
serialize.cc
snapshot-common.cc
spaces.cc
string-search.cc
string-stream.cc
stub-cache.cc
token.cc
......
......@@ -2624,15 +2624,15 @@ int Runtime::StringMatch(Handle<String> sub,
if (seq_pat->IsAsciiRepresentation()) {
Vector<const char> pat_vector = seq_pat->ToAsciiVector();
if (seq_sub->IsAsciiRepresentation()) {
return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
return SearchString(seq_sub->ToAsciiVector(), pat_vector, start_index);
}
return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
return SearchString(seq_sub->ToUC16Vector(), pat_vector, start_index);
}
Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
if (seq_sub->IsAsciiRepresentation()) {
return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
return SearchString(seq_sub->ToAsciiVector(), pat_vector, start_index);
}
return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
return SearchString(seq_sub->ToUC16Vector(), pat_vector, start_index);
}
......@@ -2889,67 +2889,39 @@ static void SetLastMatchInfoNoCaptures(Handle<String> subject,
}
template <typename schar, typename pchar>
static bool SearchStringMultiple(Vector<schar> subject,
String* pattern,
Vector<pchar> pattern_string,
template <typename SubjectChar, typename PatternChar>
static bool SearchStringMultiple(Vector<const SubjectChar> subject,
Vector<const PatternChar> pattern,
String* pattern_string,
FixedArrayBuilder* builder,
int* match_pos) {
int pos = *match_pos;
int subject_length = subject.length();
int pattern_length = pattern_string.length();
int pattern_length = pattern.length();
int max_search_start = subject_length - pattern_length;
bool is_ascii = (sizeof(schar) == 1);
StringSearchStrategy strategy =
InitializeStringSearch(pattern_string, is_ascii);
switch (strategy) {
case SEARCH_FAIL: break;
case SEARCH_SHORT:
while (pos <= max_search_start) {
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
// Position of end of previous match.
int match_end = pos + pattern_length;
int new_pos = SimpleIndexOf(subject, pattern_string, match_end);
if (new_pos >= 0) {
// A match.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
new_pos);
}
pos = new_pos;
builder->Add(pattern);
} else {
break;
}
}
break;
case SEARCH_LONG:
while (pos <= max_search_start) {
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
int match_end = pos + pattern_length;
int new_pos = ComplexIndexOf(subject, pattern_string, match_end);
if (new_pos >= 0) {
// A match has been found.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
new_pos);
}
pos = new_pos;
builder->Add(pattern);
} else {
break;
}
StringSearch<PatternChar, SubjectChar> search(pattern);
while (pos <= max_search_start) {
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
// Position of end of previous match.
int match_end = pos + pattern_length;
int new_pos = search.Search(subject, match_end);
if (new_pos >= 0) {
// A match.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
new_pos);
}
pos = new_pos;
builder->Add(pattern_string);
} else {
break;
}
}
if (pos < max_search_start) {
ReplacementStringBuilder::AddSubjectSlice(builder,
pos + pattern_length,
......@@ -2977,14 +2949,14 @@ static bool SearchStringMultiple(Handle<String> subject,
Vector<const char> subject_vector = subject->ToAsciiVector();
if (pattern->IsAsciiRepresentation()) {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToAsciiVector(),
*pattern,
builder,
&match_pos)) break;
} else {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToUC16Vector(),
*pattern,
builder,
&match_pos)) break;
}
......@@ -2992,14 +2964,14 @@ static bool SearchStringMultiple(Handle<String> subject,
Vector<const uc16> subject_vector = subject->ToUC16Vector();
if (pattern->IsAsciiRepresentation()) {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToAsciiVector(),
*pattern,
builder,
&match_pos)) break;
} else {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToUC16Vector(),
*pattern,
builder,
&match_pos)) break;
}
......@@ -4781,51 +4753,23 @@ static Object* Runtime_StringTrim(Arguments args) {
}
// Define storage for buffers declared in header file.
// TODO(lrn): Remove these when rewriting search code.
int BMBuffers::bad_char_occurrence[kBMAlphabetSize];
BMGoodSuffixBuffers BMBuffers::bmgs_buffers;
template <typename schar, typename pchar>
void FindStringIndices(Vector<const schar> subject,
Vector<const pchar> pattern,
template <typename SubjectChar, typename PatternChar>
void FindStringIndices(Vector<const SubjectChar> subject,
Vector<const PatternChar> pattern,
ZoneList<int>* indices,
unsigned int limit) {
ASSERT(limit > 0);
// Collect indices of pattern in subject, and the end-of-string index.
// Stop after finding at most limit values.
StringSearchStrategy strategy =
InitializeStringSearch(pattern, sizeof(schar) == 1);
switch (strategy) {
case SEARCH_FAIL: return;
case SEARCH_SHORT: {
int pattern_length = pattern.length();
int index = 0;
while (limit > 0) {
index = SimpleIndexOf(subject, pattern, index);
if (index < 0) return;
indices->Add(index);
index += pattern_length;
limit--;
}
return;
}
case SEARCH_LONG: {
int pattern_length = pattern.length();
int index = 0;
while (limit > 0) {
index = ComplexIndexOf(subject, pattern, index);
if (index < 0) return;
indices->Add(index);
index += pattern_length;
limit--;
}
return;
}
default:
UNREACHABLE();
return;
StringSearch<PatternChar, SubjectChar> search(pattern);
int pattern_length = pattern.length();
int index = 0;
while (limit > 0) {
index = search.Search(subject, index);
if (index < 0) return;
indices->Add(index);
index += pattern_length;
limit--;
}
}
......
This diff is collapsed.
......@@ -441,6 +441,8 @@
'../../src/spaces-inl.h',
'../../src/spaces.cc',
'../../src/spaces.h',
'../../src/string-search.cc',
'../../src/string-search.h',
'../../src/string-stream.cc',
'../../src/string-stream.h',
'../../src/stub-cache.cc',
......
......@@ -122,6 +122,7 @@
89A88E1F0E71A6B40043BA31 /* snapshot-common.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1820E719B8F00D62E90 /* snapshot-common.cc */; };
89A88E200E71A6B60043BA31 /* snapshot-empty.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1830E719B8F00D62E90 /* snapshot-empty.cc */; };
89A88E210E71A6B70043BA31 /* spaces.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1860E719B8F00D62E90 /* spaces.cc */; };
89A88E220E71A6BC0043BA31 /* string-search.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1880E719B8F00D62E90 /* string-search.cc */; };
89A88E220E71A6BC0043BA31 /* string-stream.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1880E719B8F00D62E90 /* string-stream.cc */; };
89A88E230E71A6BE0043BA31 /* stub-cache-ia32.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF18B0E719B8F00D62E90 /* stub-cache-ia32.cc */; };
89A88E240E71A6BF0043BA31 /* stub-cache.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF18C0E719B8F00D62E90 /* stub-cache.cc */; };
......@@ -183,6 +184,7 @@
89F23C730E78D5B2006B2466 /* snapshot-common.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1820E719B8F00D62E90 /* snapshot-common.cc */; };
89F23C740E78D5B2006B2466 /* snapshot-empty.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1830E719B8F00D62E90 /* snapshot-empty.cc */; };
89F23C750E78D5B2006B2466 /* spaces.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1860E719B8F00D62E90 /* spaces.cc */; };
89F23C760E78D5B2006B2466 /* string-search.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1880E719B8F00D62E90 /* string-search.cc */; };
89F23C760E78D5B2006B2466 /* string-stream.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF1880E719B8F00D62E90 /* string-stream.cc */; };
89F23C780E78D5B2006B2466 /* stub-cache.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF18C0E719B8F00D62E90 /* stub-cache.cc */; };
89F23C790E78D5B2006B2466 /* token.cc in Sources */ = {isa = PBXBuildFile; fileRef = 897FF18E0E719B8F00D62E90 /* token.cc */; };
......@@ -502,6 +504,8 @@
897FF1850E719B8F00D62E90 /* spaces-inl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "spaces-inl.h"; sourceTree = "<group>"; };
897FF1860E719B8F00D62E90 /* spaces.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spaces.cc; sourceTree = "<group>"; };
897FF1870E719B8F00D62E90 /* spaces.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spaces.h; sourceTree = "<group>"; };
897FF1880E719B8F00D62E90 /* string-search.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "string-search.cc"; sourceTree = "<group>"; };
897FF1880E719B8F00D62E90 /* string-search.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "string-search.h"; sourceTree = "<group>"; };
897FF1880E719B8F00D62E90 /* string-stream.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "string-stream.cc"; sourceTree = "<group>"; };
897FF1890E719B8F00D62E90 /* string-stream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "string-stream.h"; sourceTree = "<group>"; };
897FF18A0E719B8F00D62E90 /* stub-cache-arm.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "stub-cache-arm.cc"; path = "arm/stub-cache-arm.cc"; sourceTree = "<group>"; };
......@@ -964,6 +968,8 @@
897FF1870E719B8F00D62E90 /* spaces.h */,
9FA38BAC1175B2D200C4CD55 /* splay-tree-inl.h */,
9FA38BAD1175B2D200C4CD55 /* splay-tree.h */,
897FF1880E719B8F00D62E90 /* string-search.cc */,
897FF1890E719B8F00D62E90 /* string-search.h */,
897FF1880E719B8F00D62E90 /* string-stream.cc */,
897FF1890E719B8F00D62E90 /* string-stream.h */,
897FF18A0E719B8F00D62E90 /* stub-cache-arm.cc */,
......@@ -1353,6 +1359,7 @@
89A88E1F0E71A6B40043BA31 /* snapshot-common.cc in Sources */,
89A88E200E71A6B60043BA31 /* snapshot-empty.cc in Sources */,
89A88E210E71A6B70043BA31 /* spaces.cc in Sources */,
89A88E220E71A6BC0043BA31 /* string-search.cc in Sources */,
89A88E220E71A6BC0043BA31 /* string-stream.cc in Sources */,
89A88E230E71A6BE0043BA31 /* stub-cache-ia32.cc in Sources */,
89A88E240E71A6BF0043BA31 /* stub-cache.cc in Sources */,
......@@ -1478,6 +1485,7 @@
89F23C730E78D5B2006B2466 /* snapshot-common.cc in Sources */,
89F23C740E78D5B2006B2466 /* snapshot-empty.cc in Sources */,
89F23C750E78D5B2006B2466 /* spaces.cc in Sources */,
89F23C760E78D5B2006B2466 /* string-search.cc in Sources */,
89F23C760E78D5B2006B2466 /* string-stream.cc in Sources */,
89F23CA00E78D609006B2466 /* stub-cache-arm.cc in Sources */,
89F23C780E78D5B2006B2466 /* stub-cache.cc in Sources */,
......
......@@ -937,6 +937,14 @@
RelativePath="..\..\src\spaces.h"
>
</File>
<File
RelativePath="..\..\src\string-search.cc"
>
</File>
<File
RelativePath="..\..\src\string-search.h"
>
</File>
<File
RelativePath="..\..\src\string-stream.cc"
>
......
......@@ -911,6 +911,14 @@
RelativePath="..\..\src\spaces.h"
>
</File>
<File
RelativePath="..\..\src\string-search.cc"
>
</File>
<File
RelativePath="..\..\src\string-search.h"
>
</File>
<File
RelativePath="..\..\src\string-stream.cc"
>
......
......@@ -897,6 +897,14 @@
RelativePath="..\..\src\spaces.h"
>
</File>
<File
RelativePath="..\..\src\string-search.cc"
>
</File>
<File
RelativePath="..\..\src\string-search.h"
>
</File>
<File
RelativePath="..\..\src\string-stream.cc"
>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment