Commit 702cc25d authored by yangguo@chromium.org's avatar yangguo@chromium.org

Optimize non-ASCII string splitting with single-character search pattern

Review URL: https://chromiumcodereview.appspot.com/11299163
Patch from Ben Noordhuis <ben@c9.io>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 00bde585
...@@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char> subject, ...@@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char> subject,
} }
void FindTwoByteStringIndices(const Vector<const uc16> subject,
uc16 pattern,
ZoneList<int>* indices,
unsigned int limit,
Zone* zone) {
ASSERT(limit > 0);
const uc16* subject_start = subject.start();
const uc16* subject_end = subject_start + subject.length();
for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
if (*pos == pattern) {
indices->Add(static_cast<int>(pos - subject_start), zone);
limit--;
}
}
}
template <typename SubjectChar, typename PatternChar> template <typename SubjectChar, typename PatternChar>
void FindStringIndices(Isolate* isolate, void FindStringIndices(Isolate* isolate,
Vector<const SubjectChar> subject, Vector<const SubjectChar> subject,
...@@ -2826,22 +2843,40 @@ void FindStringIndicesDispatch(Isolate* isolate, ...@@ -2826,22 +2843,40 @@ void FindStringIndicesDispatch(Isolate* isolate,
} else { } else {
Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern_content.IsAscii()) { if (pattern_content.IsAscii()) {
Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector,
pattern_vector[0],
indices,
limit,
zone);
} else {
FindStringIndices(isolate, FindStringIndices(isolate,
subject_vector, subject_vector,
pattern_content.ToAsciiVector(), pattern_vector,
indices,
limit,
zone);
}
} else {
Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector,
pattern_vector[0],
indices, indices,
limit, limit,
zone); zone);
} else { } else {
FindStringIndices(isolate, FindStringIndices(isolate,
subject_vector, subject_vector,
pattern_content.ToUC16Vector(), pattern_vector,
indices, indices,
limit, limit,
zone); zone);
} }
} }
} }
}
} }
......
...@@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/ ...@@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/
assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/)); assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/));
assertArrayEquals(["Wenige", "sind", "auserwählt."],
"Wenige sind auserwählt.".split(" "));
assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0));
assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1));
assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ", 2));
assertArrayEquals(["Wenige", "sind", "auserwählt."],
"Wenige sind auserwählt.".split(" ", 3));
assertArrayEquals(["Wenige sind auserw", "hlt."],
"Wenige sind auserwählt.".split("ä"));
assertArrayEquals(["Wenige sind ", "."],
"Wenige sind auserwählt.".split("auserwählt"));
/* "ab".split(/((?=.))/) /* "ab".split(/((?=.))/)
* *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment