Commit b348d47b authored by jshin's avatar jshin Committed by Commit bot

Use ICU case conversion/transliterator for case conversion

When I18N is enabled, use ICU's case conversion API and transliteration
API [1] to implement String.prototype.to{Upper,Lower}Case and
String.prototype.toLocale{Upper,Lower}Case.

* ICU-based case conversion was implemented in runtime-i18n.cc/i18n.js
* The above 4 functions are overridden with those in i18n.js when
  --icu_case_mapping flag is turned on. To control the override by the flag,
  they're overriden in icu-case-mapping.js

Previously, toLocale{U,L}Case just called to{U,L}Case so that they didn't
support locale-sensitive case conversion for Turkic languages (az, tr),
Greek (el) and Lithuanian (lt).

Before ICU APIs for the most general case are called, a fast-path for Latin-1
is tried. It's taken from Blink and adopted as necessary. This fast path
is always tried for to{U,L}Case. For toLocale{U,L}Case, it's only taken
when a locale (explicitly specified or default) is not in {az, el, lt, tr}.

With these changes, a build with --icu_case_mapping=true passes a bunch
of tests in test262/intl402/Strings/* and intl/* that failed before.

Handling of pure ASCII strings (aligned at word boundary) are not as fast
as Unibrow's implementation that uses word-by-word case conversion. OTOH,
Latin-1 input handling is faster than Unibrow. General Unicode input
handling is slower but more accurate.

See https://docs.google.com/spreadsheets/d/1KJCJxKc1FxFXjwmYqABS0_2cNdPetvnd8gY8_HGSbrg/edit?usp=sharing for the benchmark.

This CL started with http://crrev.com/1544023002#ps200001 by littledan@,
but has changed significantly since.

[1] See why transliteration API is needed for uppercasing in Greek.
    http://bugs.icu-project.org/trac/ticket/10582

R=yangguo
BUG=v8:4476,v8:4477
LOG=Y
TEST=test262/{built-ins,intl402}/Strings/*, webkit/fast/js/*, mjsunit/string-case,
     intl/general/case*

Review-Url: https://codereview.chromium.org/1812673005
Cr-Commit-Position: refs/heads/master@{#36187}
parent adcc5119
...@@ -428,6 +428,10 @@ action("js2c_experimental") { ...@@ -428,6 +428,10 @@ action("js2c_experimental") {
"$target_gen_dir/experimental-libraries.cc", "$target_gen_dir/experimental-libraries.cc",
] ]
if (v8_enable_i18n_support) {
sources += [ "src/js/icu-case-mapping.js" ]
}
args = [ args = [
rebase_path("$target_gen_dir/experimental-libraries.cc", rebase_path("$target_gen_dir/experimental-libraries.cc",
root_build_dir), root_build_dir),
......
...@@ -2478,6 +2478,9 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_instanceof) ...@@ -2478,6 +2478,9 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_instanceof)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_restrictive_declarations) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_restrictive_declarations)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_exponentiation_operator) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_exponentiation_operator)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_string_padding) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_string_padding)
#ifdef V8_I18N_SUPPORT
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(icu_case_mapping)
#endif
void InstallPublicSymbol(Factory* factory, Handle<Context> native_context, void InstallPublicSymbol(Factory* factory, Handle<Context> native_context,
const char* name, Handle<Symbol> value) { const char* name, Handle<Symbol> value) {
...@@ -3046,6 +3049,10 @@ bool Genesis::InstallExperimentalNatives() { ...@@ -3046,6 +3049,10 @@ bool Genesis::InstallExperimentalNatives() {
static const char* harmony_exponentiation_operator_natives[] = {nullptr}; static const char* harmony_exponentiation_operator_natives[] = {nullptr};
static const char* harmony_string_padding_natives[] = { static const char* harmony_string_padding_natives[] = {
"native harmony-string-padding.js", nullptr}; "native harmony-string-padding.js", nullptr};
#ifdef V8_I18N_SUPPORT
static const char* icu_case_mapping_natives[] = {"native icu-case-mapping.js",
nullptr};
#endif
for (int i = ExperimentalNatives::GetDebuggerCount(); for (int i = ExperimentalNatives::GetDebuggerCount();
i < ExperimentalNatives::GetBuiltinsCount(); i++) { i < ExperimentalNatives::GetBuiltinsCount(); i++) {
......
...@@ -193,12 +193,22 @@ DEFINE_IMPLICATION(es_staging, harmony_regexp_lookbehind) ...@@ -193,12 +193,22 @@ DEFINE_IMPLICATION(es_staging, harmony_regexp_lookbehind)
DEFINE_IMPLICATION(es_staging, move_object_start) DEFINE_IMPLICATION(es_staging, move_object_start)
// Features that are still work in progress (behind individual flags). // Features that are still work in progress (behind individual flags).
#ifdef V8_I18N_SUPPORT
#define HARMONY_INPROGRESS(V) \
V(harmony_function_sent, "harmony function.sent") \
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
V(harmony_simd, "harmony simd") \
V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_regexp_property, "harmony unicode regexp property classes") \
V(icu_case_mapping, "case mapping with ICU rather than Unibrow")
#else
#define HARMONY_INPROGRESS(V) \ #define HARMONY_INPROGRESS(V) \
V(harmony_function_sent, "harmony function.sent") \ V(harmony_function_sent, "harmony function.sent") \
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \ V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
V(harmony_simd, "harmony simd") \ V(harmony_simd, "harmony simd") \
V(harmony_do_expressions, "harmony do-expressions") \ V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_regexp_property, "harmony unicode regexp property classes") V(harmony_regexp_property, "harmony unicode regexp property classes")
#endif
// Features that are complete (but still behind --harmony/es-staging flag). // Features that are complete (but still behind --harmony/es-staging flag).
#define HARMONY_STAGED(V) \ #define HARMONY_STAGED(V) \
......
...@@ -142,6 +142,13 @@ var AVAILABLE_LOCALES = { ...@@ -142,6 +142,13 @@ var AVAILABLE_LOCALES = {
*/ */
var DEFAULT_ICU_LOCALE = UNDEFINED; var DEFAULT_ICU_LOCALE = UNDEFINED;
function GetDefaultICULocaleJS() {
if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) {
DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
}
return DEFAULT_ICU_LOCALE;
}
/** /**
* Unicode extension regular expression. * Unicode extension regular expression.
*/ */
...@@ -446,11 +453,7 @@ function lookupMatcher(service, requestedLocales) { ...@@ -446,11 +453,7 @@ function lookupMatcher(service, requestedLocales) {
} }
// Didn't find a match, return default. // Didn't find a match, return default.
if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) { return {'locale': GetDefaultICULocaleJS(), 'extension': '', 'position': -1};
DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
}
return {'locale': DEFAULT_ICU_LOCALE, 'extension': '', 'position': -1};
} }
...@@ -722,21 +725,24 @@ function toTitleCaseTimezoneLocation(location) { ...@@ -722,21 +725,24 @@ function toTitleCaseTimezoneLocation(location) {
*/ */
function canonicalizeLanguageTag(localeID) { function canonicalizeLanguageTag(localeID) {
// null is typeof 'object' so we have to do extra check. // null is typeof 'object' so we have to do extra check.
if (typeof localeID !== 'string' && typeof localeID !== 'object' || if ((!IS_STRING(localeID) && !IS_RECEIVER(localeID)) ||
IS_NULL(localeID)) { IS_NULL(localeID)) {
throw MakeTypeError(kLanguageID); throw MakeTypeError(kLanguageID);
} }
// Optimize for the most common case; a language code alone in
// the canonical form/lowercase (e.g. "en", "fil").
if (IS_STRING(localeID) &&
!IS_NULL(InternalRegExpMatch(/^[a-z]{2,3}$/, localeID))) {
return localeID;
}
var localeString = GlobalString(localeID); var localeString = GlobalString(localeID);
if (isValidLanguageTag(localeString) === false) { if (isValidLanguageTag(localeString) === false) {
throw MakeRangeError(kInvalidLanguageTag, localeString); throw MakeRangeError(kInvalidLanguageTag, localeString);
} }
// This call will strip -kn but not -kn-true extensions.
// ICU bug filled - http://bugs.icu-project.org/trac/ticket/9265.
// TODO(cira): check if -u-kn-true-kc-true-kh-true still throws after
// upgrade to ICU 4.9.
var tag = %CanonicalizeLanguageTag(localeString); var tag = %CanonicalizeLanguageTag(localeString);
if (tag === 'invalid-tag') { if (tag === 'invalid-tag') {
throw MakeRangeError(kInvalidLanguageTag, localeString); throw MakeRangeError(kInvalidLanguageTag, localeString);
...@@ -1989,6 +1995,37 @@ function cachedOrNewService(service, locales, options, defaults) { ...@@ -1989,6 +1995,37 @@ function cachedOrNewService(service, locales, options, defaults) {
return new savedObjects[service](locales, useOptions); return new savedObjects[service](locales, useOptions);
} }
function LocaleConvertCase(s, locales, isToUpper) {
// ECMA 402 section 13.1.2 steps 1 through 12.
var language;
// Optimize for the most common two cases. initializeLocaleList() can handle
// them as well, but it's rather slow accounting for over 60% of
// toLocale{U,L}Case() and about 40% of toLocale{U,L}Case("<locale>").
if (IS_UNDEFINED(locales)) {
language = GetDefaultICULocaleJS();
} else if (IS_STRING(locales)) {
language = canonicalizeLanguageTag(locales);
} else {
var locales = initializeLocaleList(locales);
language = locales.length > 0 ? locales[0] : GetDefaultICULocaleJS();
}
// StringSplit is slower than this.
var pos = %_Call(StringIndexOf, language, '-');
if (pos != -1) {
language = %_Call(StringSubstring, language, 0, pos);
}
var CUSTOM_CASE_LANGUAGES = ['az', 'el', 'lt', 'tr'];
var langIndex = %_Call(ArrayIndexOf, CUSTOM_CASE_LANGUAGES, language);
if (langIndex == -1) {
// language-independent case conversion.
return isToUpper ? %StringToUpperCaseI18N(s) : %StringToLowerCaseI18N(s);
}
return %StringLocaleConvertCase(s, isToUpper,
CUSTOM_CASE_LANGUAGES[langIndex]);
}
/** /**
* Compares this and that, and returns less than 0, 0 or greater than 0 value. * Compares this and that, and returns less than 0, 0 or greater than 0 value.
* Overrides the built-in method. * Overrides the built-in method.
...@@ -2041,6 +2078,56 @@ OverrideFunction(GlobalString.prototype, 'normalize', function() { ...@@ -2041,6 +2078,56 @@ OverrideFunction(GlobalString.prototype, 'normalize', function() {
} }
); );
function ToLowerCaseI18N() {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
var s = TO_STRING(this);
return %StringToLowerCaseI18N(s);
}
function ToUpperCaseI18N() {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toUpperCase");
var s = TO_STRING(this);
return %StringToUpperCaseI18N(s);
}
function ToLocaleLowerCaseI18N(locales) {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleLowerCase");
return LocaleConvertCase(TO_STRING(this), locales, false);
}
%FunctionSetLength(ToLocaleLowerCaseI18N, 0);
function ToLocaleUpperCaseI18N(locales) {
if (!IS_UNDEFINED(new.target)) {
throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
}
CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleUpperCase");
return LocaleConvertCase(TO_STRING(this), locales, true);
}
%FunctionSetLength(ToLocaleUpperCaseI18N, 0);
%FunctionRemovePrototype(ToLowerCaseI18N);
%FunctionRemovePrototype(ToUpperCaseI18N);
%FunctionRemovePrototype(ToLocaleLowerCaseI18N);
%FunctionRemovePrototype(ToLocaleUpperCaseI18N);
utils.Export(function(to) {
to.ToLowerCaseI18N = ToLowerCaseI18N;
to.ToUpperCaseI18N = ToUpperCaseI18N;
to.ToLocaleLowerCaseI18N = ToLocaleLowerCaseI18N;
to.ToLocaleUpperCaseI18N = ToLocaleUpperCaseI18N;
});
/** /**
* Formats a Number object (this) using locale and options values. * Formats a Number object (this) using locale and options values.
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
(function(global, utils) {
"use strict";
%CheckIsBootstrapping();
var GlobalString = global.String;
var OverrideFunction = utils.OverrideFunction;
var ToLowerCaseI18N = utils.ImportNow("ToLowerCaseI18N");
var ToUpperCaseI18N = utils.ImportNow("ToUpperCaseI18N");
var ToLocaleLowerCaseI18N = utils.ImportNow("ToLocaleLowerCaseI18N");
var ToLocaleUpperCaseI18N = utils.ImportNow("ToLocaleUpperCaseI18N");
OverrideFunction(GlobalString.prototype, 'toLowerCase', ToLowerCaseI18N, true);
OverrideFunction(GlobalString.prototype, 'toUpperCase', ToUpperCaseI18N, true);
OverrideFunction(GlobalString.prototype, 'toLocaleLowerCase',
ToLocaleLowerCaseI18N, true);
OverrideFunction(GlobalString.prototype, 'toLocaleUpperCase',
ToLocaleUpperCaseI18N, true);
})
...@@ -208,7 +208,11 @@ function PostNatives(utils) { ...@@ -208,7 +208,11 @@ function PostNatives(utils) {
"SetIteratorNext", "SetIteratorNext",
"SetValues", "SetValues",
"SymbolToString", "SymbolToString",
"ToLocaleLowerCaseI18N",
"ToLocaleUpperCaseI18N",
"ToLowerCaseI18N",
"ToPositiveInteger", "ToPositiveInteger",
"ToUpperCaseI18N",
// From runtime: // From runtime:
"is_concat_spreadable_symbol", "is_concat_spreadable_symbol",
"iterator_symbol", "iterator_symbol",
......
...@@ -8645,26 +8645,26 @@ class String: public Name { ...@@ -8645,26 +8645,26 @@ class String: public Name {
class FlatContent { class FlatContent {
public: public:
// Returns true if the string is flat and this structure contains content. // Returns true if the string is flat and this structure contains content.
bool IsFlat() { return state_ != NON_FLAT; } bool IsFlat() const { return state_ != NON_FLAT; }
// Returns true if the structure contains one-byte content. // Returns true if the structure contains one-byte content.
bool IsOneByte() { return state_ == ONE_BYTE; } bool IsOneByte() const { return state_ == ONE_BYTE; }
// Returns true if the structure contains two-byte content. // Returns true if the structure contains two-byte content.
bool IsTwoByte() { return state_ == TWO_BYTE; } bool IsTwoByte() const { return state_ == TWO_BYTE; }
// Return the one byte content of the string. Only use if IsOneByte() // Return the one byte content of the string. Only use if IsOneByte()
// returns true. // returns true.
Vector<const uint8_t> ToOneByteVector() { Vector<const uint8_t> ToOneByteVector() const {
DCHECK_EQ(ONE_BYTE, state_); DCHECK_EQ(ONE_BYTE, state_);
return Vector<const uint8_t>(onebyte_start, length_); return Vector<const uint8_t>(onebyte_start, length_);
} }
// Return the two-byte content of the string. Only use if IsTwoByte() // Return the two-byte content of the string. Only use if IsTwoByte()
// returns true. // returns true.
Vector<const uc16> ToUC16Vector() { Vector<const uc16> ToUC16Vector() const {
DCHECK_EQ(TWO_BYTE, state_); DCHECK_EQ(TWO_BYTE, state_);
return Vector<const uc16>(twobyte_start, length_); return Vector<const uc16>(twobyte_start, length_);
} }
uc16 Get(int i) { uc16 Get(int i) const {
DCHECK(i < length_); DCHECK(i < length_);
DCHECK(state_ != NON_FLAT); DCHECK(state_ != NON_FLAT);
if (state_ == ONE_BYTE) return onebyte_start[i]; if (state_ == ONE_BYTE) return onebyte_start[i];
......
This diff is collapsed.
...@@ -1077,7 +1077,7 @@ MUST_USE_RESULT static Object* ConvertCase( ...@@ -1077,7 +1077,7 @@ MUST_USE_RESULT static Object* ConvertCase(
RUNTIME_FUNCTION(Runtime_StringToLowerCase) { RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
HandleScope scope(isolate); HandleScope scope(isolate);
DCHECK(args.length() == 1); DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0); CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping()); return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping());
} }
...@@ -1085,7 +1085,7 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCase) { ...@@ -1085,7 +1085,7 @@ RUNTIME_FUNCTION(Runtime_StringToLowerCase) {
RUNTIME_FUNCTION(Runtime_StringToUpperCase) { RUNTIME_FUNCTION(Runtime_StringToUpperCase) {
HandleScope scope(isolate); HandleScope scope(isolate);
DCHECK(args.length() == 1); DCHECK_EQ(args.length(), 1);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0); CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping()); return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping());
} }
......
...@@ -262,7 +262,10 @@ namespace internal { ...@@ -262,7 +262,10 @@ namespace internal {
F(BreakIteratorFirst, 1, 1) \ F(BreakIteratorFirst, 1, 1) \
F(BreakIteratorNext, 1, 1) \ F(BreakIteratorNext, 1, 1) \
F(BreakIteratorCurrent, 1, 1) \ F(BreakIteratorCurrent, 1, 1) \
F(BreakIteratorBreakType, 1, 1) F(BreakIteratorBreakType, 1, 1) \
F(StringToLowerCaseI18N, 1, 1) \
F(StringToUpperCaseI18N, 1, 1) \
F(StringLocaleConvertCase, 3, 1)
#else #else
#define FOR_EACH_INTRINSIC_I18N(F) #define FOR_EACH_INTRINSIC_I18N(F)
#endif #endif
......
...@@ -1988,17 +1988,6 @@ ...@@ -1988,17 +1988,6 @@
}, { }, {
'toolsets': ['target'], 'toolsets': ['target'],
}], }],
['v8_enable_i18n_support==1', {
'variables': {
'i18n_library_files': [
'js/i18n.js',
],
},
}, {
'variables': {
'i18n_library_files': [],
},
}],
], ],
'variables': { 'variables': {
'library_files': [ 'library_files': [
...@@ -2048,6 +2037,12 @@ ...@@ -2048,6 +2037,12 @@
'libraries_experimental_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental.bin', 'libraries_experimental_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental.bin',
'libraries_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-extras.bin', 'libraries_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-extras.bin',
'libraries_experimental_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental-extras.bin', 'libraries_experimental_extras_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental-extras.bin',
'conditions': [
['v8_enable_i18n_support==1', {
'library_files': ['js/i18n.js'],
'experimental_library_files': ['js/icu-case-mapping.js'],
}],
],
}, },
'actions': [ 'actions': [
{ {
...@@ -2055,7 +2050,6 @@ ...@@ -2055,7 +2050,6 @@
'inputs': [ 'inputs': [
'../tools/js2c.py', '../tools/js2c.py',
'<@(library_files)', '<@(library_files)',
'<@(i18n_library_files)'
], ],
'outputs': ['<(SHARED_INTERMEDIATE_DIR)/libraries.cc'], 'outputs': ['<(SHARED_INTERMEDIATE_DIR)/libraries.cc'],
'action': [ 'action': [
...@@ -2064,7 +2058,6 @@ ...@@ -2064,7 +2058,6 @@
'<(SHARED_INTERMEDIATE_DIR)/libraries.cc', '<(SHARED_INTERMEDIATE_DIR)/libraries.cc',
'CORE', 'CORE',
'<@(library_files)', '<@(library_files)',
'<@(i18n_library_files)'
], ],
}, },
{ {
...@@ -2072,7 +2065,6 @@ ...@@ -2072,7 +2065,6 @@
'inputs': [ 'inputs': [
'../tools/js2c.py', '../tools/js2c.py',
'<@(library_files)', '<@(library_files)',
'<@(i18n_library_files)'
], ],
'outputs': ['<@(libraries_bin_file)'], 'outputs': ['<@(libraries_bin_file)'],
'action': [ 'action': [
...@@ -2081,7 +2073,6 @@ ...@@ -2081,7 +2073,6 @@
'<(SHARED_INTERMEDIATE_DIR)/libraries.cc', '<(SHARED_INTERMEDIATE_DIR)/libraries.cc',
'CORE', 'CORE',
'<@(library_files)', '<@(library_files)',
'<@(i18n_library_files)',
'--startup_blob', '<@(libraries_bin_file)', '--startup_blob', '<@(libraries_bin_file)',
'--nojs', '--nojs',
], ],
...@@ -2098,7 +2089,7 @@ ...@@ -2098,7 +2089,7 @@
'../tools/js2c.py', '../tools/js2c.py',
'<(SHARED_INTERMEDIATE_DIR)/experimental-libraries.cc', '<(SHARED_INTERMEDIATE_DIR)/experimental-libraries.cc',
'EXPERIMENTAL', 'EXPERIMENTAL',
'<@(experimental_library_files)' '<@(experimental_library_files)',
], ],
}, },
{ {
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --icu_case_mapping
// Some edge cases that unibrow got wrong
assertEquals("𐐘", "𐑀".toUpperCase());
assertEquals("𐑀", "𐐘".toLowerCase());
assertEquals("σ", "Σ".toLowerCase());
// Some different paths in the ICU case conversion fastpath
assertEquals("σς", "\u03A3\u03A3".toLowerCase());
// Expand sharp s in latin1 fastpath
assertEquals("ASSB", "A\u00DFB".toUpperCase());
assertEquals("AB", "Ab".toUpperCase());
// Find first upper case in fastpath
assertEquals("ab", "aB".toLowerCase());
assertEquals("AÜ", "aü".toUpperCase());
assertEquals("AÜ", "AÜ".toUpperCase());
assertEquals("aü", "aü".toLowerCase());
assertEquals("aü", "AÜ".toLowerCase());
assertEquals("aü", "AÜ".toLowerCase());
// Starts with fastpath, but switches to full Unicode path
// U+00FF is uppercased to U+0178.
assertEquals("AŸ", "aÿ".toUpperCase());
// U+00B5 (µ) is uppercased to U+039C (Μ)
assertEquals("AΜ", "aµ".toUpperCase());
// Buffer size increase
assertEquals("CSSBẶ", "cßbặ".toUpperCase());
assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase());
// OneByte input with buffer size increase: non-fast path
assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr"));
// More comprehensive tests for "tr", "az" and "lt" are in
// test262/intl402/Strings/*
// Buffer size decrease with a single locale or locale list.
// In Turkic (tr, az), U+0307 preceeded by Capital Letter I is dropped.
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("tr"));
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("az"));
assertEquals("abci", "aBcI\u0307".toLocaleLowerCase(["tr", "en"]));
// Cons string
assertEquals("abcijkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("tr"));
assertEquals("abcijkl",
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("tr"));
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en"));
assertEquals("abci\u0307jkl",
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en"));
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase());
assertEquals("abci\u0307jkl",
("aB" + "cI" + "\u0307j" + "kl").toLowerCase());
// "tr" and "az" should behave identically.
assertEquals("aBcI\u0307".toLocaleLowerCase("tr"),
"aBcI\u0307".toLocaleLowerCase("az"));
// What matters is the first locale in the locale list.
assertEquals("aBcI\u0307".toLocaleLowerCase(["tr", "en", "fr"]),
"aBcI\u0307".toLocaleLowerCase("tr"));
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
"aBcI\u0307".toLocaleLowerCase("en"));
assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
"aBcI\u0307".toLowerCase());
// An empty locale list is the same as the default locale. Try these tests
// under Turkish and Greek locale.
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
"aBcI\u0307".toLocaleLowerCase());
assertEquals("aBcI\u0307".toLocaleLowerCase([]),
"aBcI\u0307".toLocaleLowerCase(Intl.GetDefaultLocale));
assertEquals("άόύώ".toLocaleUpperCase([]), "άόύώ".toLocaleUpperCase());
assertEquals("άόύώ".toLocaleUpperCase([]),
"άόύώ".toLocaleUpperCase(Intl.GetDefaultLocale));
// English/root locale keeps U+0307 (combining dot above).
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));
assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());
// Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and
// other diacritic marks are dropped. This rule is based on the current CLDR's
// el-Upper transformation, but Greek uppercasing rules are more sophisticated
// than this. See http://bugs.icu-project.org/trac/ticket/10582 and
// http://unicode.org/cldr/trac/ticket/7905 .
assertEquals("Α", \u0301".toLocaleUpperCase("el"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-GR"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek-GR"));
assertEquals("Α", "ά".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", \u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΟΕ", \u1f15".toLocaleUpperCase("el"));
assertEquals("ΟΕ", \u0301ε\u0314\u0301".toLocaleUpperCase("el"));
// Input and output are identical.
assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el"));
assertEquals("ΑΒΓΔΕ", "ΑΒΓΔΕ".toLocaleUpperCase("el"));
assertEquals("ΑΒΓΔΕАБ𝐀𝐁", "ΑΒΓΔΕАБ𝐀𝐁".toLocaleUpperCase("el"));
assertEquals("ABCDEÂÓḴ123", "ABCDEÂÓḴ123".toLocaleUpperCase("el"));
// ASCII-only or Latin-1 only: 1-byte
assertEquals("ABCDE123", "ABCDE123".toLocaleUpperCase("el"));
assertEquals("ABCDEÂÓ123", "ABCDEÂÓ123".toLocaleUpperCase("el"));
// To make sure that the input string is not overwritten in place.
var strings = ["abCdef", "αβγδε", "άόύώ", "аб"];
for (var s of strings) {
var backupAsArray = s.split("");
var uppered = s.toLocaleUpperCase("el");
assertEquals(s, backupAsArray.join(""));
}
// In other locales, U+0301 is preserved.
assertEquals(\u0301Ο\u0301Υ\u0301Ω\u0301",
\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("en"));
assertEquals(\u0301Ο\u0301Υ\u0301Ω\u0301",
\u0301ο\u0301υ\u0301ω\u0301".toUpperCase());
// Plane 1; Deseret and Warang Citi Script.
assertEquals("\u{10400}\u{118A0}", "\u{10428}\u{118C0}".toUpperCase());
assertEquals("\u{10428}\u{118C0}", "\u{10400}\u{118A0}".toLowerCase());
// Mathematical Bold {Capital, Small} Letter A do not change.
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toUpperCase());
assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toLowerCase());
// Plane 1; New characters in Unicode 8.0
assertEquals("\u{10C80}", "\u{10CC0}".toUpperCase());
assertEquals("\u{10CC0}", "\u{10C80}".toLowerCase());
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase());
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
...@@ -26,10 +26,12 @@ ...@@ -26,10 +26,12 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os import os
import re
from testrunner.local import testsuite from testrunner.local import testsuite
from testrunner.objects import testcase from testrunner.objects import testcase
FLAGS_PATTERN = re.compile(r"//\s+Flags:(.*)")
class IntlTestSuite(testsuite.TestSuite): class IntlTestSuite(testsuite.TestSuite):
...@@ -55,7 +57,11 @@ class IntlTestSuite(testsuite.TestSuite): ...@@ -55,7 +57,11 @@ class IntlTestSuite(testsuite.TestSuite):
return tests return tests
def GetFlagsForTestCase(self, testcase, context): def GetFlagsForTestCase(self, testcase, context):
source = self.GetSourceForTest(testcase)
flags = ["--allow-natives-syntax"] + context.mode_flags flags = ["--allow-natives-syntax"] + context.mode_flags
flags_match = re.findall(FLAGS_PATTERN, source)
for match in flags_match:
flags += match.strip().split()
files = [] files = []
files.append(os.path.join(self.root, "assert.js")) files.append(os.path.join(self.root, "assert.js"))
...@@ -71,6 +77,10 @@ class IntlTestSuite(testsuite.TestSuite): ...@@ -71,6 +77,10 @@ class IntlTestSuite(testsuite.TestSuite):
return testcase.flags + flags return testcase.flags + flags
def GetSourceForTest(self, testcase):
filename = os.path.join(self.root, testcase.path + self.suffix())
with open(filename) as f:
return f.read()
def GetSuite(name, root): def GetSuite(name, root):
return IntlTestSuite(name, root) return IntlTestSuite(name, root)
...@@ -139,14 +139,16 @@ ...@@ -139,14 +139,16 @@
'intl402/NumberFormat/11.1.1_1': [FAIL], 'intl402/NumberFormat/11.1.1_1': [FAIL],
# https://code.google.com/p/v8/issues/detail?id=4476 # https://code.google.com/p/v8/issues/detail?id=4476
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL], # The bug is fixed but behind a flag, --icu_case_mapping.
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLowerCase/special_casing_conditional': [FAIL], 'built-ins/String/prototype/toLowerCase/special_casing_conditional': [FAIL],
'built-ins/String/prototype/toLowerCase/supplementary_plane': [FAIL], 'built-ins/String/prototype/toLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toUpperCase/supplementary_plane': [FAIL], 'built-ins/String/prototype/toUpperCase/supplementary_plane': [FAIL],
# https://code.google.com/p/v8/issues/detail?id=4477 # https://code.google.com/p/v8/issues/detail?id=4477
# The bug is fixed but behind a flag, --icu_case_mapping.
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Azeri': [FAIL], 'intl402/String/prototype/toLocaleLowerCase/special_casing_Azeri': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Lithuanian': [FAIL], 'intl402/String/prototype/toLocaleLowerCase/special_casing_Lithuanian': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Turkish': [FAIL], 'intl402/String/prototype/toLocaleLowerCase/special_casing_Turkish': [FAIL],
...@@ -423,6 +425,22 @@ ...@@ -423,6 +425,22 @@
'built-ins/String/prototype/normalize/return-normalized-string': [SKIP], 'built-ins/String/prototype/normalize/return-normalized-string': [SKIP],
'built-ins/String/prototype/normalize/return-normalized-string-from-coerced-form': [SKIP], 'built-ins/String/prototype/normalize/return-normalized-string-from-coerced-form': [SKIP],
'built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter': [SKIP], 'built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter': [SKIP],
# Case-conversion is not fully compliant to the Unicode spec with i18n off.
'built-ins/String/prototype/toLocaleLowerCase/special_casing_conditional': [FAIL],
'built-ins/String/prototype/toLocaleLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLowerCase/special_casing_conditional': [FAIL],
'built-ins/String/prototype/toLowerCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toLocaleUpperCase/supplementary_plane': [FAIL],
'built-ins/String/prototype/toUpperCase/supplementary_plane': [FAIL],
# Locale-sensitive case-conversion is not available with i18n off.
'intl402/String/prototype/toLocaleLowerCase/special_casing_Azeri': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Lithuanian': [FAIL],
'intl402/String/prototype/toLocaleLowerCase/special_casing_Turkish': [FAIL],
'intl402/String/prototype/toLocaleUpperCase/special_casing_Azeri': [FAIL],
'intl402/String/prototype/toLocaleUpperCase/special_casing_Lithuanian': [FAIL],
'intl402/String/prototype/toLocaleUpperCase/special_casing_Turkish': [FAIL],
}], # no_i18n == True }], # no_i18n == True
['arch == arm or arch == mipsel or arch == mips or arch == arm64 or arch == mips64 or arch == mips64el', { ['arch == arm or arch == mipsel or arch == mips or arch == arm64 or arch == mips64 or arch == mips64el', {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment