Commit 702cc25d authored by yangguo@chromium.org's avatar yangguo@chromium.org

Optimize non-ASCII string splitting with single-character search pattern

Review URL: https://chromiumcodereview.appspot.com/11299163
Patch from Ben Noordhuis <ben@c9.io>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 00bde585
......@@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char> subject,
}
void FindTwoByteStringIndices(const Vector<const uc16> subject,
uc16 pattern,
ZoneList<int>* indices,
unsigned int limit,
Zone* zone) {
ASSERT(limit > 0);
const uc16* subject_start = subject.start();
const uc16* subject_end = subject_start + subject.length();
for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
if (*pos == pattern) {
indices->Add(static_cast<int>(pos - subject_start), zone);
limit--;
}
}
}
template <typename SubjectChar, typename PatternChar>
void FindStringIndices(Isolate* isolate,
Vector<const SubjectChar> subject,
......@@ -2826,19 +2843,37 @@ void FindStringIndicesDispatch(Isolate* isolate,
} else {
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern_content.IsAscii()) {
FindStringIndices(isolate,
subject_vector,
pattern_content.ToAsciiVector(),
indices,
limit,
zone);
Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector,
pattern_vector[0],
indices,
limit,
zone);
} else {
FindStringIndices(isolate,
subject_vector,
pattern_vector,
indices,
limit,
zone);
}
} else {
FindStringIndices(isolate,
subject_vector,
pattern_content.ToUC16Vector(),
indices,
limit,
zone);
Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector,
pattern_vector[0],
indices,
limit,
zone);
} else {
FindStringIndices(isolate,
subject_vector,
pattern_vector,
indices,
limit,
zone);
}
}
}
}
......
......@@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/
assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/));
assertArrayEquals(["Wenige", "sind", "auserwählt."],
"Wenige sind auserwählt.".split(" "));
assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0));
assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1));
assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ", 2));
assertArrayEquals(["Wenige", "sind", "auserwählt."],
"Wenige sind auserwählt.".split(" ", 3));
assertArrayEquals(["Wenige sind auserw", "hlt."],
"Wenige sind auserwählt.".split("ä"));
assertArrayEquals(["Wenige sind ", "."],
"Wenige sind auserwählt.".split("auserwählt"));
/* "ab".split(/((?=.))/)
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment