Commit b348d47b authored by jshin's avatar jshin Committed by Commit bot

Use ICU case conversion/transliterator for case conversion

When I18N is enabled, use ICU's case conversion API and transliteration
API [1] to implement String.prototype.to{Upper,Lower}Case and
String.prototype.toLocale{Upper,Lower}Case.

* ICU-based case conversion was implemented in runtime-i18n.cc/i18n.js
* The above 4 functions are overridden with those in i18n.js when
  --icu_case_mapping flag is turned on. To control the override by the flag,
  they're overriden in icu-case-mapping.js

Previously, toLocale{U,L}Case just called to{U,L}Case so that they didn't
support locale-sensitive case conversion for Turkic languages (az, tr),
Greek (el) and Lithuanian (lt).

Before ICU APIs for the most general case are called, a fast-path for Latin-1
is tried. It's taken from Blink and adopted as necessary. This fast path
is always tried for to{U,L}Case. For toLocale{U,L}Case, it's only taken
when a locale (explicitly specified or default) is not in {az, el, lt, tr}.

With these changes, a build with --icu_case_mapping=true passes a bunch
of tests in test262/intl402/Strings/* and intl/* that failed before.

Handling of pure ASCII strings (aligned at word boundary) are not as fast
as Unibrow's implementation that uses word-by-word case conversion. OTOH,
Latin-1 input handling is faster than Unibrow. General Unicode input
handling is slower but more accurate.

See https://docs.google.com/spreadsheets/d/1KJCJxKc1FxFXjwmYqABS0_2cNdPetvnd8gY8_HGSbrg/edit?usp=sharing for the benchmark.

This CL started with http://crrev.com/1544023002#ps200001 by littledan@,
but has changed significantly since.

[1] See why transliteration API is needed for uppercasing in Greek.
    http://bugs.icu-project.org/trac/ticket/10582

R=yangguo
BUG=v8:4476,v8:4477
LOG=Y
TEST=test262/{built-ins,intl402}/Strings/*, webkit/fast/js/*, mjsunit/string-case,
     intl/general/case*

Review-Url: https://codereview.chromium.org/1812673005
Cr-Commit-Position: refs/heads/master@{#36187}
parent adcc5119
......@@ -428,6 +428,10 @@ action("js2c_experimental") {
"$target_gen_dir/experimental-libraries.cc",
]
if (v8_enable_i18n_support) {
sources += [ "src/js/icu-case-mapping.js" ]
}
args = [
rebase_path("$target_gen_dir/experimental-libraries.cc",
root_build_dir),
......
......@@ -2478,6 +2478,9 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_instanceof)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_restrictive_declarations)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_exponentiation_operator)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_string_padding)
#ifdef V8_I18N_SUPPORT
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(icu_case_mapping)
#endif
void InstallPublicSymbol(Factory* factory, Handle<Context> native_context,
const char* name, Handle<Symbol> value) {
......@@ -3046,6 +3049,10 @@ bool Genesis::InstallExperimentalNatives() {
static const char* harmony_exponentiation_operator_natives[] = {nullptr};
static const char* harmony_string_padding_natives[] = {
"native harmony-string-padding.js", nullptr};
#ifdef V8_I18N_SUPPORT
static const char* icu_case_mapping_natives[] = {"native icu-case-mapping.js",
nullptr};
#endif
for (int i = ExperimentalNatives::GetDebuggerCount();
i < ExperimentalNatives::GetBuiltinsCount(); i++) {
......
......@@ -193,12 +193,22 @@ DEFINE_IMPLICATION(es_staging, harmony_regexp_lookbehind)
DEFINE_IMPLICATION(es_staging, move_object_start)
// Features that are still work in progress (behind individual flags).
#ifdef V8_I18N_SUPPORT
#define HARMONY_INPROGRESS(V) \
V(harmony_function_sent, "harmony function.sent") \
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
V(harmony_simd, "harmony simd") \
V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_regexp_property, "harmony unicode regexp property classes") \
V(icu_case_mapping, "case mapping with ICU rather than Unibrow")
#else
#define HARMONY_INPROGRESS(V) \
V(harmony_function_sent, "harmony function.sent") \
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
V(harmony_simd, "harmony simd") \
V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_regexp_property, "harmony unicode regexp property classes")
#endif
// Features that are complete (but still behind --harmony/es-staging flag).
#define HARMONY_STAGED(V) \
......
......@@ -142,6 +142,13 @@ var AVAILABLE_LOCALES = {
*/
var DEFAULT_ICU_LOCALE = UNDEFINED;
function GetDefaultICULocaleJS() {
if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) {
DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
}
return DEFAULT_ICU_LOCALE;
}
/**
* Unicode extension regular expression.
*/
......@@ -446,11 +453,7 @@ function lookupMatcher(service, requestedLocales) {
}
// Didn't find a match, return default.
if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) {
DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
}
return {'locale': DEFAULT_ICU_LOCALE, 'extension': '', 'position': -1};
return {'locale': GetDefaultICULocaleJS(), 'extension': '', 'position': -1};
}
......@@ -722,21 +725,24 @@ function toTitleCaseTimezoneLocation(location) {
*/
function canonicalizeLanguageTag(localeID) {
// null is typeof 'object' so we have to do extra check.
if (typeof localeID !== 'string' && typeof localeID !== 'object' ||
if ((!IS_STRING(localeID) && !IS_RECEIVER(localeID)) ||
IS_NULL(localeID)) {
throw MakeTypeError(kLanguageID);
}
// Optimize for the most common case; a language code alone in
// the canonical form/lowercase (e.g. "en", "fil").
if (IS_STRING(localeID) &&
!IS_NULL(InternalRegExpMatch(/^[a-z]{2,3}$/, localeID))) {
return localeID;
}
var localeString = GlobalString(localeID);
if (isValidLanguageTag(localeString) === false) {
throw MakeRangeError(kInvalidLanguageTag, localeString);
}
// This call will strip -kn but not -kn-true extensions.
// ICU bug filled - http://bugs.icu-project.org/trac/ticket/9265.
// TODO(cira): check if -u-kn-true-kc-true-kh-true still throws after
// upgrade to ICU 4.9.
var tag = %CanonicalizeLanguageTag(localeString);
if (tag === 'invalid-tag') {
throw MakeRangeError(kInvalidLanguageTag, localeString);
......@@ -1989,6 +1995,37 @@ function cachedOrNewService(service, locales, options, defaults) {
return new savedObjects[service](locales, useOptions);
}
function LocaleConvertCase(s, locales, isToUpper) {
// ECMA 402 section 13.1.2 steps 1 through 12.
var language;
// Optimize for the most common two cases. initializeLocaleList() can handle
// them as well, but it's rather slow accounting for over 60% of
// toLocale{U,L}Case() and about 40% of toLocale{U,L}Case("<locale>").
if (IS_UNDEFINED(locales)) {
language = GetDefaultICULocaleJS();
} else if (IS_STRING(locales)) {
language = canonicalizeLanguageTag(locales);
} else {
var locales = initializeLocaleList(locales);
language = locales.length > 0 ? locales[0] : GetDefaultICULocaleJS();
}
// StringSplit is slower than this.
var pos = %_Call(StringIndexOf, language, '-');
if (pos != -1) {
language = %_Call(StringSubstring, language, 0, pos);
}
var CUSTOM_CASE_LANGUAGES = ['az', 'el', 'lt', 'tr'];
var langIndex = %_Call(ArrayIndexOf, CUSTOM_CASE_LANGUAGES, language);
if (langIndex == -1) {
// language-independent case conversion.
return isToUpper ? %StringToUpperCaseI18N(s) : %StringToLowerCaseI18N(s);
}
return %StringLocaleConvertCase(s, isToUpper,
CUSTOM_CASE_LANGUAGES[langIndex]);
}
/**
* Compares this and that, and returns less than 0, 0 or greater than 0 value.
* Overrides the built-in method.
......@@ -2041,6 +2078,56 @@ OverrideFunction(GlobalString.prototype, 'normalize', function() {
}
);
function ToLowerCaseI18N() {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
var s = TO_STRING(this);
return %StringToLowerCaseI18N(s);
}
function ToUpperCaseI18N() {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toUpperCase");
var s = TO_STRING(this);
return %StringToUpperCaseI18N(s);
}
function ToLocaleLowerCaseI18N(locales) {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleLowerCase");
return LocaleConvertCase(TO_STRING(this), locales, false);
}
%FunctionSetLength(ToLocaleLowerCaseI18N, 0);
function ToLocaleUpperCaseI18N(locales) {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleUpperCase");
return LocaleConvertCase(TO_STRING(this), locales, true);
}
%FunctionSetLength(ToLocaleUpperCaseI18N, 0);
%FunctionRemovePrototype(ToLowerCaseI18N);
%FunctionRemovePrototype(ToUpperCaseI18N);
%FunctionRemovePrototype(ToLocaleLowerCaseI18N);
%FunctionRemovePrototype(ToLocaleUpperCaseI18N);
utils.Export(function(to) {
to.ToLowerCaseI18N = ToLowerCaseI18N;
to.ToUpperCaseI18N = ToUpperCaseI18N;
to.ToLocaleLowerCaseI18N = ToLocaleLowerCaseI18N;
to.ToLocaleUpperCaseI18N = ToLocaleUpperCaseI18N;
});
/**
* Formats a Number object (this) using locale and options values.
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
(function(global, utils) {
"use strict";
%CheckIsBootstrapping();
var GlobalString = global.String;
var OverrideFunction = utils.OverrideFunction;
var ToLowerCaseI18N = utils.ImportNow("ToLowerCaseI18N");
var ToUpperCaseI18N = utils.ImportNow("ToUpperCaseI18N");
var ToLocaleLowerCaseI18N = utils.ImportNow("ToLocaleLowerCaseI18N");
var ToLocaleUpperCaseI18N = utils.ImportNow("ToLocaleUpperCaseI18N");
OverrideFunction(GlobalString.prototype, 'toLowerCase', ToLowerCaseI18N, true);
OverrideFunction(GlobalString.prototype, 'toUpperCase', ToUpperCaseI18N, true);
OverrideFunction(GlobalString.prototype, 'toLocaleLowerCase',
ToLocaleLowerCaseI18N, true);
OverrideFunction(GlobalString.prototype, 'toLocaleUpperCase',
ToLocaleUpperCaseI18N, true);
})
......@@ -208,7 +208,11 @@ function PostNatives(utils) {
"SetIteratorNext",
"SetValues",
"SymbolToString",
"ToLocaleLowerCaseI18N",
"ToLocaleUpperCaseI18N",
"ToLowerCaseI18N",
"ToPositiveInteger",
"ToUpperCaseI18N",
// From runtime:
"is_concat_spreadable_symbol",
"iterator_symbol",
......
......@@ -8645,26 +8645,26 @@ class String: public Name {
class FlatContent {
public:
// Returns true if the string is flat and this structure contains content.
bool IsFlat() { return state_ != NON_FLAT; }
bool IsFlat() const { return state_ != NON_FLAT; }
// Returns true if the structure contains one-byte content.
bool IsOneByte() { return state_ == ONE_BYTE; }
bool IsOneByte() const { return state_ == ONE_BYTE; }
// Returns true if the structure contains two-byte content.
bool IsTwoByte() { return state_ == TWO_BYTE; }
bool IsTwoByte() const { return state_ == TWO_BYTE; }
// Return the one byte content of the string. Only use if IsOneByte()
// returns true.
Vector<const uint8_t> ToOneByteVector() {
Vector<const uint8_t> ToOneByteVector() const {
DCHECK_EQ(ONE_BYTE, state_);
return Vector<const uint8_t>(onebyte_start, length_);
}
// Return the two-byte content of the string. Only use if IsTwoByte()
// returns true.
Vector<const uc16> ToUC16Vector() {
Vector<const uc16> ToUC16Vector() const {
DCHECK_EQ(TWO_BYTE, state_);
return Vector<const uc16>(twobyte_start, length_);
}
uc16 Get(int i) {
uc16 Get(int i) const {
DCHECK(i < length_);
DCHECK(state_ != NON_FLAT);
if (state_ == ONE_BYTE) return onebyte_start[i];
......
This diff is collapsed.
......@@ -1077,7 +1077,7 @@ MUST_USE_RESULT static Object* ConvertCase(
RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
HandleScope scope(isolate);
DCHECK(args.length() == 1);
DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping());
}
......@@ -1085,7 +1085,7 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
RUNTIME_FUNCTION(Runtime_StringToUpperCase) {
HandleScope scope(isolate);
DCHECK(args.length() == 1);
DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping());
}
......
......@@ -262,7 +262,10 @@ namespace internal {
F(BreakIteratorFirst, 1, 1) \
F(BreakIteratorNext, 1, 1) \
F(BreakIteratorCurrent, 1, 1) \
F(BreakIteratorBreakType, 1, 1)
F(BreakIteratorBreakType, 1, 1) \
F(StringToLowerCaseI18N, 1, 1) \
F(StringToUpperCaseI18N, 1, 1) \
F(StringLocaleConvertCase, 3, 1)
#else
#define FOR_EACH_INTRINSIC_I18N(F)
#endif
......
......@@ -1988,17 +1988,6 @@
}, {
'toolsets': ['target'],
}],
['v8_enable_i18n_support==1', {
'variables': {
'i18n_library_files': [
'js/i18n.js',
],
},
}, {
'variables': {
'i18n_library_files': [],
},
}],
],
'variables': {
'library_files': [
......@@ -2048,6 +2037,12 @@
'libraries_experimental_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental.bin',
'libraries_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-extras.bin',
'libraries_experimental_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental-extras.bin',
'conditions': [
['v8_enable_i18n_support==1', {
'library_files': ['js/i18n.js'],
'experimental_library_files': ['js/icu-case-mapping.js'],
}],
],
},
'actions': [
{
......@@ -2055,7 +2050,6 @@
'inputs': [
'../tools/js2c.py',
'<@(library_files)',
'<@(i18n_library_files)'
],
'outputs': ['<(SHARED_INTERMEDIATE_DIR)/libraries.cc'],
'action': [
......@@ -2064,7 +2058,6 @@
'<(SHARED_INTERMEDIATE_DIR)/libraries.cc',
'CORE',
'<@(library_files)',
'<@(i18n_library_files)'
],
},
{
......@@ -2072,7 +2065,6 @@
'inputs': [
'../tools/js2c.py',
'<@(library_files)',
'<@(i18n_library_files)'
],
'outputs': ['<@(libraries_bin_file)'],
'action': [
......@@ -2081,7 +2073,6 @@
'<(SHARED_INTERMEDIATE_DIR)/libraries.cc',
'CORE',
'<@(library_files)',
'<@(i18n_library_files)',
'--startup_blob', '<@(libraries_bin_file)',
'--nojs',
],
......@@ -2098,7 +2089,7 @@
'../tools/js2c.py',
'<(SHARED_INTERMEDIATE_DIR)/experimental-libraries.cc',
'EXPERIMENTAL',
'<@(experimental_library_files)'
'<@(experimental_library_files)',
],
},
{
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --icu_case_mapping
// Some edge cases that unibrow got wrong
assertEquals("𐐘", "𐑀".toUpperCase());
assertEquals("𐑀", "𐐘".toLowerCase());
assertEquals("σ", "Σ".toLowerCase());
// Some different paths in the ICU case conversion fastpath
assertEquals("σς", "\u03A3\u03A3".toLowerCase());
// Expand sharp s in latin1 fastpath
assertEquals("ASSB", "A\u00DFB".toUpperCase());
assertEquals("AB", "Ab".toUpperCase());
// Find first upper case in fastpath
assertEquals("ab", "aB".toLowerCase());
assertEquals("AÜ", "aü".toUpperCase());
assertEquals("AÜ", "AÜ".toUpperCase());
assertEquals("aü", "aü".toLowerCase());
assertEquals("aü", "AÜ".toLowerCase());
assertEquals("aü", "AÜ".toLowerCase());
// Starts with fastpath, but switches to full Unicode path
// U+00FF is uppercased to U+0178.
assertEquals("AŸ", "aÿ".toUpperCase());
// U+00B5 (µ) is uppercased to U+039C (Μ)
assertEquals("AΜ", "aµ".toUpperCase());
// Buffer size increase
assertEquals("CSSBẶ", "cßbặ".toUpperCase());
assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase());
// OneByte input with buffer size increase: non-fast path
assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr"));
// More comprehensive tests for "tr", "az" and "lt" are in
// test262/intl402/Strings/*
// Buffer size decrease with a single locale or locale list.
// In Turkic (tr, az), U+0307 preceeded by Capital Letter I is dropped.
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("tr"));
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("az"));
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase(["tr", "en"]));
// Cons string
assertEquals("abcijkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("tr"));
assertEquals("abcijkl",
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("tr"));
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en"));
assertEquals("abci\u0307jkl",
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en"));
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase());
assertEquals("abci\u0307jkl",
("aB" + "cI" + "\u0307j" + "kl").toLowerCase());
// "tr" and "az" should behave identically.
assertEquals("aBcI\u0307".toLocaleLowerCase("tr"),
"aBcI\u0307".toLocaleLowerCase("az"));
// What matters is the first locale in the locale list.
assertEquals("aBcI\u0307".toLocaleLowerCase(["tr", "en", "fr"]),
"aBcI\u0307".toLocaleLowerCase("tr"));
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
"aBcI\u0307".toLocaleLowerCase("en"));
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
"aBcI\u0307".toLowerCase());
// An empty locale list is the same as the default locale. Try these tests
// under Turkish and Greek locale.
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
"aBcI\u0307".toLocaleLowerCase());
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
"aBcI\u0307".toLocaleLowerCase(Intl.GetDefaultLocale));
assertEquals("άόύώ".toLocaleUpperCase([]), "άόύώ".toLocaleUpperCase());
assertEquals("άόύώ".toLocaleUpperCase([]),
"άόύώ".toLocaleUpperCase(Intl.GetDefaultLocale));
// English/root locale keeps U+0307 (combining dot above).
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));
assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());
// Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and
// other diacritic marks are dropped. This rule is based on the current CLDR's
// el-Upper transformation, but Greek uppercasing rules are more sophisticated
// than this. See http://bugs.icu-project.org/trac/ticket/10582 and
// http://unicode.org/cldr/trac/ticket/7905 .
assertEquals("Α", \u0301".toLocaleUpperCase("el"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-GR"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek-GR"));
assertEquals("Α", "ά".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", \u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΟΕ", \u1f15".toLocaleUpperCase("el"));
assertEquals("ΟΕ", \u0301ε\u0314\u0301".toLocaleUpperCase("el"));
// Input and output are identical.
assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el"));
assertEquals("ΑΒΓΔΕ", "ΑΒΓΔΕ".toLocaleUpperCase("el"));
assertEquals("ΑΒΓΔΕАБ𝐀𝐁", "ΑΒΓΔΕАБ𝐀𝐁".toLocaleUpperCase("el"));
assertEquals("ABCDEÂÓḴ123", "ABCDEÂÓḴ123".toLocaleUpperCase("el"));
// ASCII-only or Latin-1 only: 1-byte
assertEquals("ABCDE123", "ABCDE123".toLocaleUpperCase("el"));
assertEquals("ABCDEÂÓ123", "ABCDEÂÓ123".toLocaleUpperCase("el"));
// To make sure that the input string is not overwritten in place.
var strings = ["abCdef", "αβγδε", "άόύώ", "аб"];
for (var s of strings) {
var backupAsArray = s.split("");
var uppered = s.toLocaleUpperCase("el");
assertEquals(s, backupAsArray.join(""));
}
// In other locales, U+0301 is preserved.
assertEquals(\u0301Ο\u0301Υ\u0301Ω\u0301",
\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("en"));
assertEquals(\u0301Ο\u0301Υ\u0301Ω\u0301",
\u0301ο\u0301υ\u0301ω\u0301".toUpperCase());
// Plane 1; Deseret and Warang Citi Script.
assertEquals("\u{10400}\u{118A0}", "\u{10428}\u{118C0}".toUpperCase());
assertEquals("\u{10428}\u{118C0}", "\u{10400}\u{118A0}".toLowerCase());
// Mathematical Bold {Capital, Small} Letter A do not change.
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toUpperCase());
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toLowerCase());
// Plane 1; New characters in Unicode 8.0
assertEquals("\u{10C80}", "\u{10CC0}".toUpperCase());
assertEquals("\u{10CC0}", "\u{10C80}".toLowerCase());
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase());
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
......@@ -26,10 +26,12 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import re
from testrunner.local import testsuite
from testrunner.objects import testcase
FLAGS_PATTERN = re.compile(r"//\s+Flags:(.*)")
class IntlTestSuite(testsuite.TestSuite):
......@@ -55,7 +57,11 @@ class IntlTestSuite(testsuite.TestSuite):
return tests
def GetFlagsForTestCase(self, testcase, context):
source = self.GetSourceForTest(testcase)
flags = ["--allow-natives-syntax"] + context.mode_flags
flags_match = re.findall(FLAGS_PATTERN, source)
for match in flags_match:
flags += match.strip().split()
files = []
files.append(os.path.join(self.root, "assert.js"))
......@@ -71,6 +77,10 @@ class IntlTestSuite(testsuite.TestSuite):
return testcase.flags + flags
def GetSourceForTest(self, testcase):
filename = os.path.join(self.root, testcase.path + self.suffix())
with open(filename) as f:
return f.read()
def GetSuite(name, root):
return IntlTestSuite(name, root)
......@@ -139,14 +139,16 @@
'intl402/NumberFormat/11.1.1_1': [FAIL],
# https://code.google.com/p/v8/issues/detail?id=4476
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
# The bug is fixed but behind a flag, --icu_case_mapping.
'built-ins/String/prototype/toLowerCase/special_casing_conditional': [FAIL],
'built-ins/String/prototype/toLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toUpperCase/supplementary_plane': [FAIL],
# https://code.google.com/p/v8/issues/detail?id=4477
# The bug is fixed but behind a flag, --icu_case_mapping.
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Azeri': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Lithuanian': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Turkish': [FAIL],
......@@ -423,6 +425,22 @@
'built-ins/String/prototype/normalize/return-normalized-string': [SKIP],
'built-ins/String/prototype/normalize/return-normalized-string-from-coerced-form': [SKIP],
'built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter': [SKIP],
# Case-conversion is not fully compliant to the Unicode spec with i18n off.
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLowerCase/special_casing_conditional': [FAIL],
'built-ins/String/prototype/toLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toUpperCase/supplementary_plane': [FAIL],
# Locale-sensitive case-conversion is not available with i18n off.
'intl402/String/prototype/toLocaleLowerCase/special_casing_Azeri': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Lithuanian': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Turkish': [FAIL],
'intl402/String/prototype/toLocaleUpperCase/special_casing_Azeri': [FAIL],
'intl402/String/prototype/toLocaleUpperCase/special_casing_Lithuanian': [FAIL],
'intl402/String/prototype/toLocaleUpperCase/special_casing_Turkish': [FAIL],
}], # no_i18n == True
['arch == arm or arch == mipsel or arch == mips or arch == arm64 or arch == mips64 or arch == mips64el', {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment