Commit 69bd294a authored by Jungshik Shin's avatar Jungshik Shin Committed by Commit Bot

Correct the misuse of uloc_{to,from}LanguageTag

- remove unused Runtime_GetLanguageTagVariants
- add test for another related bug (chromium:770452) as well as for 
chromium:770450 . 

Bug: chromium:770450, chromium:770452
Test: intl/general/invalid-locale.js
Cq-Include-Trybots: master.tryserver.v8:v8_linux_noi18n_rel_ng
Change-Id: I4496a4a5421000faa0e37aed85fea21ceb487998
Reviewed-on: https://chromium-review.googlesource.com/710816Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Jungshik Shin <jshin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48483}
parent e29fd74c
......@@ -68,18 +68,21 @@ RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
v8::String::Utf8Value locale_id(v8_isolate,
v8::Utils::ToLocal(locale_id_str));
// TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
// language tag if it's too long (much longer than 100 chars). Even if we
// allocate a longer buffer, ICU will still fail if it's too long. Either
// propose to Ecma 402 to put a limit on the locale length or change ICU to
// handle long locale names better. See
// https://ssl.icu-project.org/trac/ticket/13417 .
// Return value which denotes invalid language tag.
// TODO(jshin): Can uloc_{for,to}TanguageTag fail even for structually valid
// language tags? If not, just add CHECK instead of returning 'invalid-tag'.
const char* const kInvalidTag = "invalid-tag";
UErrorCode error = U_ZERO_ERROR;
char icu_result[ULOC_FULLNAME_CAPACITY];
int icu_length = 0;
uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
&icu_length, &error);
if (U_FAILURE(error) || icu_length == 0) {
uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY, nullptr,
&error);
if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
return *factory->NewStringFromAsciiChecked(kInvalidTag);
}
......@@ -88,7 +91,7 @@ RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
// Force strict BCP47 rules.
uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
if (U_FAILURE(error)) {
if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
return *factory->NewStringFromAsciiChecked(kInvalidTag);
}
......@@ -134,7 +137,7 @@ RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
error = U_ZERO_ERROR;
// No need to force strict BCP47 rules.
uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
if (U_FAILURE(error)) {
if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
// This shouldn't happen, but lets not break the user.
continue;
}
......@@ -173,91 +176,6 @@ RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
return *factory->NewStringFromStaticChars("und");
}
RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
HandleScope scope(isolate);
v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
Factory* factory = isolate->factory();
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
uint32_t length = static_cast<uint32_t>(input->length()->Number());
// Set some limit to prevent fuzz tests from going OOM.
// Can be bumped when callers' requirements change.
if (length >= 100) return isolate->ThrowIllegalOperation();
Handle<FixedArray> output = factory->NewFixedArray(length);
Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
Handle<Name> base = factory->NewStringFromStaticChars("base");
for (unsigned int i = 0; i < length; ++i) {
Handle<Object> locale_id;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
if (!locale_id->IsString()) {
return isolate->Throw(*factory->illegal_argument_string());
}
v8::String::Utf8Value utf8_locale_id(
v8_isolate, v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
UErrorCode error = U_ZERO_ERROR;
// Convert from BCP47 to ICU format.
// de-DE-u-co-phonebk -> de_DE@collation=phonebook
char icu_locale[ULOC_FULLNAME_CAPACITY];
int icu_locale_length = 0;
uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
&icu_locale_length, &error);
if (U_FAILURE(error) || icu_locale_length == 0) {
return isolate->Throw(*factory->illegal_argument_string());
}
// Maximize the locale.
// de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
char icu_max_locale[ULOC_FULLNAME_CAPACITY];
uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
&error);
// Remove extensions from maximized locale.
// de_Latn_DE@collation=phonebook -> de_Latn_DE
char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
uloc_getBaseName(icu_max_locale, icu_base_max_locale,
ULOC_FULLNAME_CAPACITY, &error);
// Get original name without extensions.
// de_DE@collation=phonebook -> de_DE
char icu_base_locale[ULOC_FULLNAME_CAPACITY];
uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
&error);
// Convert from ICU locale format to BCP47 format.
// de_Latn_DE -> de-Latn-DE
char base_max_locale[ULOC_FULLNAME_CAPACITY];
uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
ULOC_FULLNAME_CAPACITY, FALSE, &error);
// de_DE -> de-DE
char base_locale[ULOC_FULLNAME_CAPACITY];
uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
FALSE, &error);
if (U_FAILURE(error)) {
return isolate->Throw(*factory->illegal_argument_string());
}
Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
JSObject::AddProperty(result, maximized, value, NONE);
value = factory->NewStringFromAsciiChecked(base_locale);
JSObject::AddProperty(result, base, value, NONE);
output->set(i, *result);
}
Handle<JSArray> result = factory->NewJSArrayWithElements(output);
result->set_length(Smi::FromInt(length));
return *result;
}
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
HandleScope scope(isolate);
......
......@@ -253,7 +253,6 @@ namespace internal {
F(CanonicalizeLanguageTag, 1, 1) \
F(AvailableLocalesOf, 1, 1) \
F(GetDefaultICULocale, 0, 1) \
F(GetLanguageTagVariants, 1, 1) \
F(IsInitializedIntlObject, 1, 1) \
F(IsInitializedIntlObjectOfType, 2, 1) \
F(MarkAsInitializedIntlObjectOfType, 2, 1) \
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Make sure that invalid locales throw RangeError
var invalid_locales = ["arcdefghl-Latn", "fil-Latn-kxx", "fr-Latn-CAK",
"en-Latin-US", "en-a-foo-9charlong", "en-a-b",
];
for (let locale of invalid_locales) {
assertThrows("var nf = new Intl.NumberFormat('" + locale + "')", RangeError);
}
var not_so_long_locales = [
"bs-u-nu-bzcu-cab-cabs-avnlubs-avnihu-zcu-cab-cbs-avnllubs-avnihq-zcu-cab-cbs-ubs-avnihu-cabs-flus-xxd-vnluy",
"bs-u-nu-bzcu-cab-cabs-avnlubs-avnihu-zcu-cab-cbs-avnllubs-avnihq-zcu-cab-cbs-ubs-avnihu-cabs-flus-xxd",
"bs-u-nu-bzcu-cab-cabs-avnlubs-avnihu-zcu",
];
for (let locale of not_so_long_locales) {
assertEquals((new Intl.NumberFormat(locale)).resolvedOptions().numberingSystem,
"latn");
}
// The point of this test is to make sure that there's no ill-effect with too
// long a locale name. Because, thhere's no provision in the Ecma 402 on the
// length limit of a locale ID and BCP 47 (RFC 5646 section 2.1). So, it's
// a spec violation to treat this as invalid. See TODO(jshin) comment
// in Runtime_CanonicalizeLanguageTag in runtime-intl.cc .
var overlong_locales = [
"he-up-a-caiaup-araup-ai-pdu-sp-bs-up-arscna-zeieiaup-araup-arscia-rews-us-up-arscna-zeieiaup-araup-arsciap-arscna-zeieiaup-araup-arscie-u-sp-bs-uaup-arscia",
"he-up-a-caiaup-araup-ai-pdu-sp-bs-up-arscna-zeieiaup-araup-arscia-rews-us-up-arscna-zeieiaup-araup-arsciap-arscna-zeieiaup-araup-arscie-u-sp-bs-uaup-arscia-xyza",
"bs-u-nu-bzcu-cab-cabs-avnlubs-avnihu-zcu-cab-cbs-avnllubs-avnihq-zcu-cab-cbs-ubs-avnihu-cabs-flus-xxd-vnluy-abcd",
];
for (let locale of overlong_locales) {
assertThrows("var nf = new Intl.NumberFormat('" + locale + "')", RangeError)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment