case-mapping.js 11.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Some edge cases that unibrow got wrong

assertEquals("𐐘", "𐑀".toUpperCase());
assertEquals("𐑀", "𐐘".toLowerCase());
assertEquals("σ", "Σ".toLowerCase());

// Some different paths in the ICU case conversion fastpath

assertEquals("σς", "\u03A3\u03A3".toLowerCase());
// Expand sharp s in latin1 fastpath
assertEquals("ASSB", "A\u00DFB".toUpperCase());
assertEquals("AB", "Ab".toUpperCase());
17 18 19
// Find first uppercase in fastpath
// Input length < a machine word size
assertEquals("ab", "ab".toLowerCase());
20 21 22 23
assertEquals("ab", "aB".toLowerCase());
assertEquals("AÜ", "aü".toUpperCase());
assertEquals("AÜ", "AÜ".toUpperCase());
assertEquals("aü", "aü".toLowerCase());
24
assertEquals("aü", "aÜ".toLowerCase());
25 26 27
assertEquals("aü", "AÜ".toLowerCase());
assertEquals("aü", "AÜ".toLowerCase());

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
// Input length >= a machine word size
assertEquals("abcdefghij", "abcdefghij".toLowerCase());
assertEquals("abcdefghij", "abcdefghiJ".toLowerCase());
assertEquals("abçdefghij", "abçdefghiJ".toLowerCase());
assertEquals("abçdefghij", "abÇdefghiJ".toLowerCase());
assertEquals("abcdefghiá", "abcdeFghiá".toLowerCase());
assertEquals("abcdefghiá", "abcdeFghiÁ".toLowerCase());

assertEquals("ABCDEFGHIJ", "ABCDEFGHIJ".toUpperCase());
assertEquals("ABCDEFGHIJ", "ABCDEFGHIj".toUpperCase());
assertEquals("ABÇDEFGHIJ", "ABÇDEFGHIj".toUpperCase());
assertEquals("ABÇDEFGHIJ", "ABçDEFGHIj".toUpperCase());
assertEquals("ABCDEFGHIÁ", "ABCDEfGHIÁ".toUpperCase());
assertEquals("ABCDEFGHIÁ", "ABCDEfGHIá".toUpperCase());


44 45 46 47 48 49 50 51 52
// Starts with fastpath, but switches to full Unicode path
// U+00FF is uppercased to U+0178.
assertEquals("AŸ", "aÿ".toUpperCase());
// U+00B5 (µ) is uppercased to U+039C (Μ)
assertEquals("AΜ", "aµ".toUpperCase());

// Buffer size increase
assertEquals("CSSBẶ", "cßbặ".toUpperCase());
assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase());
53 54 55 56
assertEquals("ABCÀCSSA", "abcàcßa".toUpperCase());
assertEquals("ABCDEFGHIÀCSSA", "ABCDEFGHIàcßa".toUpperCase());
assertEquals("ABCDEFGHIÀCSSA", "abcdeFghiàcßa".toUpperCase());

57 58 59 60 61 62 63
// OneByte input with buffer size increase: non-fast path
assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr"));

// More comprehensive tests for "tr", "az" and "lt" are in
// test262/intl402/Strings/*

// Buffer size decrease with a single locale or locale list.
64
// In Turkic (tr, az), U+0307 preceded by Capital Letter I is dropped.
65 66 67 68 69 70 71 72 73 74 75
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("tr"));
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("az"));
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase(["tr", "en"]));

// Cons string
assertEquals("abcijkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("tr"));
assertEquals("abcijkl",
             ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("tr"));
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en"));
assertEquals("abci\u0307jkl",
             ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en"));
76 77
assertEquals("abci\u0307jkl",
             ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("fil"));
78 79 80
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase());
assertEquals("abci\u0307jkl",
             ("aB" + "cI" + "\u0307j" + "kl").toLowerCase());
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
assertEquals("[object arraybuffer]",
    (new String(new ArrayBuffer())).toLocaleLowerCase("fil"));
assertEquals("[OBJECT ARRAYBUFFER]",
    (new String(new ArrayBuffer())).toLocaleUpperCase("fil"));

assertEquals("abcde", ("a" + "b" + "cde").toLowerCase());
assertEquals("ABCDE", ("a" + "b" + "cde").toUpperCase());
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase());
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase());
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase("en"));
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase("en"));
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase("fil"));
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase("fil"));
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase("longlang"));
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase("longlang"));
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120

// "tr" and "az" should behave identically.
assertEquals("aBcI\u0307".toLocaleLowerCase("tr"),
             "aBcI\u0307".toLocaleLowerCase("az"));
// What matters is the first locale in the locale list.
assertEquals("aBcI\u0307".toLocaleLowerCase(["tr", "en", "fr"]),
             "aBcI\u0307".toLocaleLowerCase("tr"));
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
             "aBcI\u0307".toLocaleLowerCase("en"));
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
             "aBcI\u0307".toLowerCase());

// An empty locale list is the same as the default locale. Try these tests
// under Turkish and Greek locale.
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
             "aBcI\u0307".toLocaleLowerCase());
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
             "aBcI\u0307".toLocaleLowerCase(Intl.GetDefaultLocale));
assertEquals("άόύώ".toLocaleUpperCase([]), "άόύώ".toLocaleUpperCase());
assertEquals("άόύώ".toLocaleUpperCase([]),
             "άόύώ".toLocaleUpperCase(Intl.GetDefaultLocale));


// English/root locale keeps U+0307 (combining dot above).
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en"));
121
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en-GB"));
122 123 124
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));
assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());

125 126 127 128
// Anything other than 'tr' and 'az' behave like root for U+0307.
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("fil"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("zh-Hant-TW"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("i-klingon"));
129 130
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("i-enochian"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("x-foobar"));
131 132 133 134 135 136 137 138 139 140 141

// Up to 8 chars are allowed for the primary language tag in BCP 47.
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("longlang"));
assertEquals("ABCI\u0307", "aBcI\u0307".toLocaleUpperCase("longlang"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["longlang", "tr"]));
assertEquals("ABCI\u0307", "aBcI\u0307".toLocaleUpperCase(["longlang", "tr"]));
assertThrows(() => "abc".toLocaleLowerCase("longlang2"), RangeError);
assertThrows(() => "abc".toLocaleUpperCase("longlang2"), RangeError);
assertThrows(() => "abc".toLocaleLowerCase(["longlang2", "en"]), RangeError);
assertThrows(() => "abc".toLocaleUpperCase(["longlang2", "en"]), RangeError);

142
// Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and
143 144 145
// other diacritic marks are dropped.  See
// http://bugs.icu-project.org/trac/ticket/5456#comment:19 for more examples.
// See also http://bugs.icu-project.org/trac/ticket/12845 .
146 147 148 149 150
assertEquals("Α", \u0301".toLocaleUpperCase("el"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-GR"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek-GR"));
assertEquals("Α", "ά".toLocaleUpperCase("el"));
151 152 153
assertEquals("ΑΟΫΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΑΟΫΩ", \u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));
assertEquals("ΑΟΫΩ", "άόύώ".toLocaleUpperCase("el"));
154 155
assertEquals("ΟΕ", \u1f15".toLocaleUpperCase("el"));
assertEquals("ΟΕ", \u0301ε\u0314\u0301".toLocaleUpperCase("el"));
156 157 158
assertEquals("ΡΩΜΕΪΚΑ", "ρωμέικα".toLocaleUpperCase("el"));
assertEquals("ΜΑΪΟΥ, ΤΡΟΛΕΪ", "Μαΐου, τρόλεϊ".toLocaleUpperCase("el"));
assertEquals("ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.", "Το ένα ή το άλλο.".toLocaleUpperCase("el"));
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

// Input and output are identical.
assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el"));
assertEquals("ΑΒΓΔΕ", "ΑΒΓΔΕ".toLocaleUpperCase("el"));
assertEquals("ΑΒΓΔΕАБ𝐀𝐁", "ΑΒΓΔΕАБ𝐀𝐁".toLocaleUpperCase("el"));
assertEquals("ABCDEÂÓḴ123", "ABCDEÂÓḴ123".toLocaleUpperCase("el"));
// ASCII-only or Latin-1 only: 1-byte
assertEquals("ABCDE123", "ABCDE123".toLocaleUpperCase("el"));
assertEquals("ABCDEÂÓ123", "ABCDEÂÓ123".toLocaleUpperCase("el"));

// To make sure that the input string is not overwritten in place.
var strings = ["abCdef", "αβγδε", "άόύώ", "аб"];
for (var s  of strings) {
  var backupAsArray = s.split("");
  var uppered = s.toLocaleUpperCase("el");
  assertEquals(s, backupAsArray.join(""));
}

// In other locales, U+0301 is preserved.
assertEquals(\u0301Ο\u0301Υ\u0301Ω\u0301",
             \u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("en"));
assertEquals(\u0301Ο\u0301Υ\u0301Ω\u0301",
             \u0301ο\u0301υ\u0301ω\u0301".toUpperCase());

// Plane 1; Deseret and Warang Citi Script.
assertEquals("\u{10400}\u{118A0}", "\u{10428}\u{118C0}".toUpperCase());
assertEquals("\u{10428}\u{118C0}", "\u{10400}\u{118A0}".toLowerCase());
// Mathematical Bold {Capital, Small} Letter A do not change.
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toUpperCase());
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toLowerCase());
// Plane 1; New characters in Unicode 8.0
assertEquals("\u{10C80}", "\u{10CC0}".toUpperCase());
assertEquals("\u{10CC0}", "\u{10C80}".toLowerCase());
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase());
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

// check fast path for Latin-1 supplement (U+00A0 ~ U+00FF)
var latin1Suppl = "\u00A0¡¢£¤¥¦§¨©ª«¬\u00AD®°±²³´µ¶·¸¹º»¼½¾¿" +
    "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
var latin1SupplLowercased = "\u00A0¡¢£¤¥¦§¨©ª«¬\u00AD®°±²³´µ¶·¸¹º»¼½¾¿" +
    "àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
var latin1SupplUppercased = "\u00A0¡¢£¤¥¦§¨©ª«¬\u00AD®°±²³´\u039C¶·¸¹º»¼½¾¿" +
    "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞSSÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞ\u0178";

assertEquals(latin1SupplLowercased, latin1Suppl.toLowerCase());
assertEquals(latin1SupplUppercased, latin1Suppl.toUpperCase());
assertEquals(latin1SupplLowercased, latin1Suppl.toLocaleLowerCase("de"));
assertEquals(latin1SupplUppercased, latin1Suppl.toLocaleUpperCase("de"));
assertEquals(latin1SupplLowercased, latin1Suppl.toLocaleLowerCase("el"));
assertEquals(latin1SupplUppercased, latin1Suppl.toLocaleUpperCase("el"));
assertEquals(latin1SupplUppercased, latin1Suppl.toLocaleUpperCase("tr"));
assertEquals(latin1SupplLowercased, latin1Suppl.toLocaleLowerCase("tr"));
assertEquals(latin1SupplUppercased, latin1Suppl.toLocaleUpperCase("az"));
assertEquals(latin1SupplLowercased, latin1Suppl.toLocaleLowerCase("az"));
assertEquals(latin1SupplUppercased, latin1Suppl.toLocaleUpperCase("lt"));
// Lithuanian need to have a dot-above for U+00CC(Ì) and U+00CD(Í) when
// lowercasing.
assertEquals("\u00A0¡¢£¤¥¦§¨©ª«¬\u00AD®°±²³´µ¶·¸¹º»¼½¾¿" +
    "àáâãäåæçèéêëi\u0307\u0300i\u0307\u0301îïðñòóôõö×øùúûüýþß" +
    "àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ",
    latin1Suppl.toLocaleLowerCase("lt"));