Commit ac9e6285 authored by jshin's avatar jshin Committed by Commit bot

Fix two DCHECK failures in ICU case mapping code

1.
DCHECK in runtime-i18n.cc for case mapping was wrong to
assume that the longest primary language tag is 3 characters.
BCP 47 actually allows up to 8 characters.

2. GetFlatContent() was called to a string without flattening it first.

BUG=680314,680464
TEST=intl/general/case-mapping (see also the bugs)

Review-Url: https://codereview.chromium.org/2629763003
Cr-Commit-Position: refs/heads/master@{#42343}
parent f8fd6ec3
......@@ -839,6 +839,7 @@ MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawTwoByteString(dest_length));
DisallowHeapAllocation no_gc;
DCHECK(s->IsFlat());
String::FlatContent flat = s->GetFlatContent();
const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
status = U_ZERO_ERROR;
......@@ -1004,6 +1005,7 @@ MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
DisallowHeapAllocation no_gc;
DCHECK(s->IsFlat());
String::FlatContent flat = s->GetFlatContent();
uint8_t* dest = result->GetChars();
if (flat.IsOneByte()) {
......@@ -1043,6 +1045,7 @@ MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
Handle<SeqOneByteString> result =
isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
DCHECK(s->IsFlat());
int sharp_s_count;
bool is_result_single_byte;
{
......@@ -1128,8 +1131,11 @@ RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
CONVERT_ARG_HANDLE_CHECKED(String, lang_arg, 2);
DCHECK(lang_arg->length() <= 3);
// Primary language tag can be up to 8 characters long in theory.
// https://tools.ietf.org/html/bcp47#section-2.2.1
DCHECK(lang_arg->length() <= 8);
lang_arg = String::Flatten(lang_arg);
s = String::Flatten(s);
// All the languages requiring special-handling have two-letter codes.
if (V8_UNLIKELY(lang_arg->length() > 2))
......@@ -1142,7 +1148,6 @@ RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
c1 = lang.Get(0);
c2 = lang.Get(1);
}
s = String::Flatten(s);
// TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
// in the root locale needs to be adjusted for az, lt and tr because even case
// mapping of ASCII range characters are different in those locales.
......
......@@ -75,9 +75,26 @@ assertEquals("abcijkl",
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en"));
assertEquals("abci\u0307jkl",
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en"));
assertEquals("abci\u0307jkl",
("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("fil"));
assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase());
assertEquals("abci\u0307jkl",
("aB" + "cI" + "\u0307j" + "kl").toLowerCase());
assertEquals("[object arraybuffer]",
(new String(new ArrayBuffer())).toLocaleLowerCase("fil"));
assertEquals("[OBJECT ARRAYBUFFER]",
(new String(new ArrayBuffer())).toLocaleUpperCase("fil"));
assertEquals("abcde", ("a" + "b" + "cde").toLowerCase());
assertEquals("ABCDE", ("a" + "b" + "cde").toUpperCase());
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase());
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase());
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase("en"));
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase("en"));
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase("fil"));
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase("fil"));
assertEquals("abcde", ("a" + "b" + "cde").toLocaleLowerCase("longlang"));
assertEquals("ABCDE", ("a" + "b" + "cde").toLocaleUpperCase("longlang"));
// "tr" and "az" should behave identically.
assertEquals("aBcI\u0307".toLocaleLowerCase("tr"),
......@@ -103,9 +120,25 @@ assertEquals("άόύώ".toLocaleUpperCase([]),
// English/root locale keeps U+0307 (combining dot above).
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en-GB"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));
assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());
// Anything other than 'tr' and 'az' behave like root for U+0307.
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("fil"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("zh-Hant-TW"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("i-klingon"));
// Up to 8 chars are allowed for the primary language tag in BCP 47.
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("longlang"));
assertEquals("ABCI\u0307", "aBcI\u0307".toLocaleUpperCase("longlang"));
assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["longlang", "tr"]));
assertEquals("ABCI\u0307", "aBcI\u0307".toLocaleUpperCase(["longlang", "tr"]));
assertThrows(() => "abc".toLocaleLowerCase("longlang2"), RangeError);
assertThrows(() => "abc".toLocaleUpperCase("longlang2"), RangeError);
assertThrows(() => "abc".toLocaleLowerCase(["longlang2", "en"]), RangeError);
assertThrows(() => "abc".toLocaleUpperCase(["longlang2", "en"]), RangeError);
// Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and
// other diacritic marks are dropped. See
// http://bugs.icu-project.org/trac/ticket/5456#comment:19 for more examples.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment