Optimize non-ASCII string splitting with single-character search pattern

Review URL: https://chromiumcodereview.appspot.com/11299163 Patch from Ben Noordhuis <ben@c9.io>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

Optimize non-ASCII string splitting with single-character search pattern
Review URL: https://chromiumcodereview.appspot.com/11299163 Patch from Ben Noordhuis <ben@c9.io>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
702cc25d · yangguo@chromium.org · 00bde585 · 702cc25d · 702cc25d
Commit 702cc25d authored Dec 03, 2012 by yangguo@chromium.org
Show whitespace changes
Inline Side-by-side

Showing with 64 additions and 12 deletions

runtime.cc src/runtime.cc +47 -12

string-split.js test/mjsunit/string-split.js +17 -0

No files found.
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char> subject,
 }
+void FindTwoByteStringIndices(const Vector<const uc16> subject,
+                              uc16 pattern,
+                              ZoneList<int>* indices,
+                              unsigned int limit,
+                              Zone* zone) {
+  ASSERT(limit > 0);
+  const uc16* subject_start = subject.start();
+  const uc16* subject_end = subject_start + subject.length();
+  for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
+    if (*pos == pattern) {
+      indices->Add(static_cast<int>(pos - subject_start), zone);
+      limit--;
+    }
+  }
+}
 template <typename SubjectChar, typename PatternChar>
 void FindStringIndices(Isolate* isolate,
                       Vector<const SubjectChar> subject,
@@ -2826,22 +2843,40 @@ void FindStringIndicesDispatch(Isolate* isolate,
    } else {
      Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
      if (pattern_content.IsAscii()) {
+        Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
+        if (pattern_vector.length() == 1) {
+          FindTwoByteStringIndices(subject_vector,
+                                   pattern_vector[0],
+                                   indices,
+                                   limit,
+                                   zone);
+        } else {
          FindStringIndices(isolate,
                            subject_vector,
-                          pattern_content.ToAsciiVector(),
+                            pattern_vector,
+                            indices,
+                            limit,
+                            zone);
+        }
+      } else {
+        Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
+        if (pattern_vector.length() == 1) {
+          FindTwoByteStringIndices(subject_vector,
+                                   pattern_vector[0],
                                   indices,
                                   limit,
                                   zone);
        } else {
          FindStringIndices(isolate,
                            subject_vector,
-                          pattern_content.ToUC16Vector(),
+                            pattern_vector,
                            indices,
                            limit,
                            zone);
        }
      }
    }
+  }
 }

--- a/test/mjsunit/string-split.js
+++ b/test/mjsunit/string-split.js
@@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/
 assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/));
+assertArrayEquals(["Wenige", "sind", "auserwählt."],
+                  "Wenige sind auserwählt.".split(" "));
+assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0));
+assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1));
+assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ", 2));
+assertArrayEquals(["Wenige", "sind", "auserwählt."],
+                  "Wenige sind auserwählt.".split(" ", 3));
+assertArrayEquals(["Wenige sind auserw", "hlt."],
+                  "Wenige sind auserwählt.".split("ä"));
+assertArrayEquals(["Wenige sind ", "."],
+                  "Wenige sind auserwählt.".split("auserwählt"));
 /* "ab".split(/((?=.))/)
 *