Commit 4f224b39 authored by jshin's avatar jshin Committed by Commit bot

Use a regular ICU API for el-Upper

ICU now supports uppercasing in Greek via its regular uppercasing API.
So, there's no need to use a slow transliteration API for uppercasing
in Greek.

This CL includes rolling ICU to ICU 58.1.

Besides, drop intl402/Intl/getCanonicalLocales/weird-cases from
test262.status because it passes now with ICU 58.1.

BUG=chromium:637001,v8:5012

Review-Url: https://codereview.chromium.org/2491333003
Cr-Commit-Position: refs/heads/master@{#41009}
parent 9b5bdfea
......@@ -12,7 +12,7 @@ deps = {
"v8/tools/gyp":
Var("chromium_url") + "/external/gyp.git" + "@" + "e7079f0e0e14108ab0dba58728ff219637458563",
"v8/third_party/icu":
Var("chromium_url") + "/chromium/deps/icu.git" + "@" + "b0bd3ee50bc2e768d7a17cbc60d87f517f024dbe",
Var("chromium_url") + "/chromium/deps/icu.git" + "@" + "c1a237113f525a1561d4b322d7653e1083f79aaa",
"v8/third_party/instrumented_libraries":
Var("chromium_url") + "/chromium/src/third_party/instrumented_libraries.git" + "@" + "45f5814b1543e41ea0be54c771e3840ea52cca4a",
"v8/buildtools":
......
......@@ -939,55 +939,10 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
}
namespace {
void ConvertCaseWithTransliterator(icu::UnicodeString* input,
const char* transliterator_id) {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::Transliterator> translit(
icu::Transliterator::createInstance(
icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD,
status));
if (U_FAILURE(status)) return;
translit->transliterate(*input);
}
MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
bool is_to_upper, const char* lang) {
int32_t src_length = s->length();
// Greek uppercasing has to be done via transliteration.
// TODO(jshin): Drop this special-casing once ICU's regular case conversion
// API supports Greek uppercasing. See
// http://bugs.icu-project.org/trac/ticket/10582 .
// In the meantime, if there's no Greek character in |s|, call this
// function again with the root locale (lang="").
// ICU's C API for transliteration is nasty and we just use C++ API.
if (V8_UNLIKELY(is_to_upper && lang[0] == 'e' && lang[1] == 'l')) {
icu::UnicodeString converted;
std::unique_ptr<uc16[]> sap;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
// Starts with the source string (read-only alias with copy-on-write
// semantics) and will be modified to contain the converted result.
// Using read-only alias at first saves one copy operation if
// transliteration does not change the input, which is rather rare.
// Moreover, transliteration takes rather long so that saving one copy
// helps only a little bit.
converted.setTo(false, src, src_length);
ConvertCaseWithTransliterator(&converted, "el-Upper");
// If no change is made, just return |s|.
if (converted.getBuffer() == src) return *s;
}
RETURN_RESULT_OR_FAILURE(
isolate,
isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
reinterpret_cast<const uint16_t*>(converted.getBuffer()),
converted.length())));
}
auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
int32_t src_length = s->length();
int32_t dest_length = src_length;
UErrorCode status;
Handle<SeqTwoByteString> result;
......
......@@ -84,20 +84,22 @@ assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));
assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());
// Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and
// other diacritic marks are dropped. This rule is based on the current CLDR's
// el-Upper transformation, but Greek uppercasing rules are more sophisticated
// than this. See http://bugs.icu-project.org/trac/ticket/10582 and
// http://unicode.org/cldr/trac/ticket/7905 .
// other diacritic marks are dropped. See
// http://bugs.icu-project.org/trac/ticket/5456#comment:19 for more examples.
// See also http://bugs.icu-project.org/trac/ticket/12845 .
assertEquals("Α", \u0301".toLocaleUpperCase("el"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-GR"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek"));
assertEquals("Α", \u0301".toLocaleUpperCase("el-Grek-GR"));
assertEquals("Α", "ά".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", \u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));
assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΑΟΫΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΑΟΫΩ", \u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));
assertEquals("ΑΟΫΩ", "άόύώ".toLocaleUpperCase("el"));
assertEquals("ΟΕ", \u1f15".toLocaleUpperCase("el"));
assertEquals("ΟΕ", \u0301ε\u0314\u0301".toLocaleUpperCase("el"));
assertEquals("ΡΩΜΕΪΚΑ", "ρωμέικα".toLocaleUpperCase("el"));
assertEquals("ΜΑΪΟΥ, ΤΡΟΛΕΪ", "Μαΐου, τρόλεϊ".toLocaleUpperCase("el"));
assertEquals("ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.", "Το ένα ή το άλλο.".toLocaleUpperCase("el"));
// Input and output are identical.
assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el"));
......
......@@ -302,9 +302,6 @@
'built-ins/Function/prototype/toString/setter-object': [FAIL],
'built-ins/Function/prototype/toString/unicode': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=5012
# http://bugs.icu-project.org/trac/ticket/12671
'intl402/Intl/getCanonicalLocales/weird-cases': [FAIL],
# https://github.com/tc39/test262/issues/743
'intl402/Intl/getCanonicalLocales/main': [FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment