Fixed problem where the two lower-case sigmas would uncanonicalize to

themselves and upper-case sigma, but upper-case sigma would uncanonicalize to just lower-case final sigma. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@844 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

Fixed problem where the two lower-case sigmas would uncanonicalize to
themselves and upper-case sigma, but upper-case sigma would uncanonicalize to just lower-case final sigma. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@844 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
144c8c79 · christian.plesner.hansen@gmail.com · 02a4aeeb · 144c8c79 · 144c8c79 · 144c8c79
Commit 144c8c79 authored Nov 26, 2008 by christian.plesner.hansen@gmail.com
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 9 deletions

jsregexp.h src/jsregexp.h +1 -1

unicode.cc src/unicode.cc +7 -7

test-regexp.cc test/cctest/test-regexp.cc +17 -1

No files found.
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -206,7 +206,7 @@ class CharacterRange {
  bool is_valid() { return from_ <= to_; }
  bool IsSingleton() { return (from_ == to_); }
  void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
-  static const int kRangeCanonicalizeMax = 0x200;
+  static const int kRangeCanonicalizeMax = 0x346;
  static const int kStartMarker = (1 << 24);
  static const int kPayloadMask = (1 << 24) - 1;
 private:

--- a/src/unicode.cc
+++ b/src/unicode.cc
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -1086,7 +1086,7 @@ TEST(RangeCanonicalization) {
    unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    int first_length = un_canonicalize.get(next_block, '\0', first);
    for (unsigned i = 1; i < dist; i++) {
-      CHECK_EQ(i, CanonRange(i));
+      CHECK_EQ(i, CanonRange(next_block + i));
      unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
      int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
      CHECK_EQ(first_length, succ_length);
@@ -1101,6 +1101,22 @@ TEST(RangeCanonicalization) {
 }
+TEST(UncanonicalizeEquivalence) {
+  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
+  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+  for (int i = 0; i < (1 << 16); i++) {
+    int length = un_canonicalize.get(i, '\0', chars);
+    for (int j = 0; j < length; j++) {
+      unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+      int length2 = un_canonicalize.get(chars[j], '\0', chars2);
+      CHECK_EQ(length, length2);
+      for (int k = 0; k < length; k++)
+        CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
+    }
+  }
+}
 static void TestRangeCaseIndependence(CharacterRange input,
                                      Vector<CharacterRange> expected) {
  ZoneScope zone_scope(DELETE_ON_EXIT);