Commit 8d094249 authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

[Intl] Validate u extension type

Fix intl402/*/ignore-invalid-unicode-ext-values
Add tests for other valid/invalid -u- ext values.

Bug: v8:7481
Change-Id: I429effd071bb03599a1e767bb2a9e9918a91b850
Reviewed-on: https://chromium-review.googlesource.com/c/1351307
Commit-Queue: Frank Tang <ftang@chromium.org>
Reviewed-by: 's avatarSathya Gunasekaran <gsathya@chromium.org>
Cr-Commit-Position: refs/heads/master@{#58173}
parent 29a970a2
...@@ -2056,6 +2056,7 @@ void JSCollator::JSCollatorVerify(Isolate* isolate) { ...@@ -2056,6 +2056,7 @@ void JSCollator::JSCollatorVerify(Isolate* isolate) {
JSObjectVerify(isolate); JSObjectVerify(isolate);
VerifyObjectField(isolate, kICUCollatorOffset); VerifyObjectField(isolate, kICUCollatorOffset);
VerifyObjectField(isolate, kBoundCompareOffset); VerifyObjectField(isolate, kBoundCompareOffset);
VerifyObjectField(isolate, kLocaleOffset);
} }
void JSDateTimeFormat::JSDateTimeFormatVerify(Isolate* isolate) { void JSDateTimeFormat::JSDateTimeFormatVerify(Isolate* isolate) {
......
...@@ -2028,6 +2028,7 @@ void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT ...@@ -2028,6 +2028,7 @@ void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, *this, "JSCollator"); JSObjectPrintHeader(os, *this, "JSCollator");
os << "\n - icu collator: " << Brief(icu_collator()); os << "\n - icu collator: " << Brief(icu_collator());
os << "\n - bound compare: " << Brief(bound_compare()); os << "\n - bound compare: " << Brief(bound_compare());
os << "\n - locale: " << Brief(locale());
JSObjectPrintBody(os, *this); JSObjectPrintBody(os, *this);
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "src/string-case.h" #include "src/string-case.h"
#include "unicode/basictz.h" #include "unicode/basictz.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/calendar.h"
#include "unicode/coll.h" #include "unicode/coll.h"
#include "unicode/decimfmt.h" #include "unicode/decimfmt.h"
#include "unicode/locid.h" #include "unicode/locid.h"
...@@ -511,24 +512,12 @@ bool RemoveLocaleScriptTag(const std::string& icu_locale, ...@@ -511,24 +512,12 @@ bool RemoveLocaleScriptTag(const std::string& icu_locale,
std::set<std::string> Intl::BuildLocaleSet( std::set<std::string> Intl::BuildLocaleSet(
const icu::Locale* icu_available_locales, int32_t count) { const icu::Locale* icu_available_locales, int32_t count) {
std::set<std::string> locales; std::set<std::string> locales;
UErrorCode error = U_ZERO_ERROR;
char result[ULOC_FULLNAME_CAPACITY];
for (int32_t i = 0; i < count; ++i) { for (int32_t i = 0; i < count; ++i) {
const char* icu_name = icu_available_locales[i].getName(); std::string locale = Intl::ToLanguageTag(icu_available_locales[i]);
error = U_ZERO_ERROR;
// No need to force strict BCP47 rules.
uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
// This shouldn't happen, but lets not break the user.
continue;
}
std::string locale(result);
locales.insert(locale); locales.insert(locale);
std::string shortened_locale; std::string shortened_locale;
if (RemoveLocaleScriptTag(icu_name, &shortened_locale)) { if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-'); std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
locales.insert(shortened_locale); locales.insert(shortened_locale);
} }
...@@ -537,6 +526,27 @@ std::set<std::string> Intl::BuildLocaleSet( ...@@ -537,6 +526,27 @@ std::set<std::string> Intl::BuildLocaleSet(
return locales; return locales;
} }
std::string Intl::ToLanguageTag(const icu::Locale& locale) {
UErrorCode status = U_ZERO_ERROR;
std::string res = locale.toLanguageTag<std::string>(status);
CHECK(U_SUCCESS(status));
// Hack to remove -true from unicode extensions
// Address https://crbug.com/v8/8565
// TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag
// by fixing ICU-20310.
const char* kSepTrue = "-true";
size_t u_ext_start = res.find("-u-");
if (u_ext_start != std::string::npos) {
for (size_t sep_true =
res.find(kSepTrue, u_ext_start + 5 /* strlen("-u-xx") == 5 */);
sep_true != std::string::npos; sep_true = res.find(kSepTrue)) {
res.erase(sep_true, 5 /* strlen(kSepTrue) == 5 */);
}
}
return res;
}
namespace { namespace {
std::string DefaultLocale(Isolate* isolate) { std::string DefaultLocale(Isolate* isolate) {
if (isolate->default_locale().empty()) { if (isolate->default_locale().empty()) {
...@@ -546,13 +556,9 @@ std::string DefaultLocale(Isolate* isolate) { ...@@ -546,13 +556,9 @@ std::string DefaultLocale(Isolate* isolate) {
isolate->set_default_locale("en-US"); isolate->set_default_locale("en-US");
} else { } else {
// Set the locale // Set the locale
char result[ULOC_FULLNAME_CAPACITY]; isolate->set_default_locale(default_locale.isBogus()
UErrorCode status = U_ZERO_ERROR; ? "und"
int32_t length = : Intl::ToLanguageTag(default_locale));
uloc_toLanguageTag(default_locale.getName(), result,
ULOC_FULLNAME_CAPACITY, FALSE, &status);
isolate->set_default_locale(
U_SUCCESS(status) ? std::string(result, length) : "und");
} }
DCHECK(!isolate->default_locale().empty()); DCHECK(!isolate->default_locale().empty());
} }
...@@ -758,29 +764,12 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate, ...@@ -758,29 +764,12 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
// propose to Ecma 402 to put a limit on the locale length or change ICU to // propose to Ecma 402 to put a limit on the locale length or change ICU to
// handle long locale names better. See // handle long locale names better. See
// https://unicode-org.atlassian.net/browse/ICU-13417 // https://unicode-org.atlassian.net/browse/ICU-13417
UErrorCode error = U_ZERO_ERROR; // forLanguageTag checks the structrual validity. If the input BCP47
char icu_result[ULOC_FULLNAME_CAPACITY];
// uloc_forLanguageTag checks the structrual validity. If the input BCP47
// language tag is parsed all the way to the end, it indicates that the input // language tag is parsed all the way to the end, it indicates that the input
// is structurally valid. Due to a couple of bugs, we can't use it // is structurally valid. Due to a couple of bugs, we can't use it
// without Chromium patches or ICU 62 or earlier. // without Chromium patches or ICU 62 or earlier.
int parsed_length; UErrorCode error = U_ZERO_ERROR;
uloc_forLanguageTag(locale.c_str(), icu_result, ULOC_FULLNAME_CAPACITY, icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
&parsed_length, &error);
if (U_FAILURE(error) ||
static_cast<size_t>(parsed_length) < locale.length() ||
error == U_STRING_NOT_TERMINATED_WARNING) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
Nothing<std::string>());
}
// Force strict BCP47 rules.
char result[ULOC_FULLNAME_CAPACITY];
int32_t result_len = uloc_toLanguageTag(icu_result, result,
ULOC_FULLNAME_CAPACITY, TRUE, &error);
if (U_FAILURE(error)) { if (U_FAILURE(error)) {
THROW_NEW_ERROR_RETURN_VALUE( THROW_NEW_ERROR_RETURN_VALUE(
isolate, isolate,
...@@ -788,7 +777,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate, ...@@ -788,7 +777,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
Nothing<std::string>()); Nothing<std::string>());
} }
return Just(std::string(result, result_len)); return Just(Intl::ToLanguageTag(icu_locale));
} }
Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList( Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
...@@ -1454,6 +1443,44 @@ MaybeHandle<JSObject> Intl::SupportedLocalesOf( ...@@ -1454,6 +1443,44 @@ MaybeHandle<JSObject> Intl::SupportedLocalesOf(
} }
namespace { namespace {
template <typename T>
bool IsValidExtension(const icu::Locale& locale, const char* key,
const std::string& value) {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::StringEnumeration> enumeration(
T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
false, status));
if (U_SUCCESS(status)) {
int32_t length;
std::string legacy_type(uloc_toLegacyType(key, value.c_str()));
for (const char* item = enumeration->next(&length, status); item != nullptr;
item = enumeration->next(&length, status)) {
if (U_SUCCESS(status) && legacy_type == item) {
return true;
}
}
}
return false;
}
bool IsValidCalendar(const icu::Locale& locale, const std::string& value) {
return IsValidExtension<icu::Calendar>(locale, "calendar", value);
}
bool IsValidCollation(const icu::Locale& locale, const std::string& value) {
std::set<std::string> invalid_values = {"standard", "search"};
if (invalid_values.find(value) != invalid_values.end()) return false;
return IsValidExtension<icu::Collator>(locale, "collation", value);
}
bool IsValidNumberingSystem(const std::string& value) {
std::set<std::string> invalid_values = {"native", "traditio", "finance"};
if (invalid_values.find(value) != invalid_values.end()) return false;
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::NumberingSystem> numbering_system(
icu::NumberingSystem::createInstanceByName(value.c_str(), status));
return U_SUCCESS(status) && numbering_system.get() != nullptr;
}
std::map<std::string, std::string> LookupAndValidateUnicodeExtensions( std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) { icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
...@@ -1493,13 +1520,40 @@ std::map<std::string, std::string> LookupAndValidateUnicodeExtensions( ...@@ -1493,13 +1520,40 @@ std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) { if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value); const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
extensions.insert( bool is_valid_value = false;
std::pair<std::string, std::string>(bcp47_key, bcp47_value)); // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
} else { if (strcmp("ca", bcp47_key) == 0) {
status = U_ZERO_ERROR; is_valid_value = IsValidCalendar(*icu_locale, bcp47_value);
icu_locale->setKeywordValue(keyword, nullptr, status); } else if (strcmp("co", bcp47_key) == 0) {
CHECK(U_SUCCESS(status)); is_valid_value = IsValidCollation(*icu_locale, bcp47_value);
} else if (strcmp("hc", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("lb", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
std::set<std::string> valid_values = {"strict", "normal", "loose"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("kn", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
std::set<std::string> valid_values = {"true", "false"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("kf", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
std::set<std::string> valid_values = {"upper", "lower", "false"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("nu", bcp47_key) == 0) {
is_valid_value = IsValidNumberingSystem(bcp47_value);
}
if (is_valid_value) {
extensions.insert(
std::pair<std::string, std::string>(bcp47_key, bcp47_value));
continue;
}
} }
status = U_ZERO_ERROR;
icu_locale->setKeywordValue(keyword, nullptr, status);
CHECK(U_SUCCESS(status));
} }
return extensions; return extensions;
...@@ -1581,11 +1635,7 @@ Intl::ResolvedLocale Intl::ResolveLocale( ...@@ -1581,11 +1635,7 @@ Intl::ResolvedLocale Intl::ResolveLocale(
std::map<std::string, std::string> extensions = std::map<std::string, std::string> extensions =
LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys); LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
char canonicalized_locale[ULOC_FULLNAME_CAPACITY]; std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale);
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(icu_locale.getName(), canonicalized_locale,
ULOC_FULLNAME_CAPACITY, true, &status);
CHECK(U_SUCCESS(status));
// TODO(gsathya): Remove privateuse subtags from extensions. // TODO(gsathya): Remove privateuse subtags from extensions.
......
...@@ -51,6 +51,8 @@ class Intl { ...@@ -51,6 +51,8 @@ class Intl {
static std::set<std::string> BuildLocaleSet( static std::set<std::string> BuildLocaleSet(
const icu::Locale* icu_available_locales, int32_t count); const icu::Locale* icu_available_locales, int32_t count);
static std::string ToLanguageTag(const icu::Locale& locale);
// Get the name of the numbering system from locale. // Get the name of the numbering system from locale.
// ICU doesn't expose numbering system in any way, so we have to assume that // ICU doesn't expose numbering system in any way, so we have to assume that
// for given locale NumberingSystem constructor produces the same digits as // for given locale NumberingSystem constructor produces the same digits as
......
...@@ -22,6 +22,7 @@ OBJECT_CONSTRUCTORS_IMPL(JSCollator, JSObject) ...@@ -22,6 +22,7 @@ OBJECT_CONSTRUCTORS_IMPL(JSCollator, JSObject)
ACCESSORS(JSCollator, icu_collator, Managed<icu::Collator>, kICUCollatorOffset) ACCESSORS(JSCollator, icu_collator, Managed<icu::Collator>, kICUCollatorOffset)
ACCESSORS(JSCollator, bound_compare, Object, kBoundCompareOffset); ACCESSORS(JSCollator, bound_compare, Object, kBoundCompareOffset);
ACCESSORS2(JSCollator, locale, String, kLocaleOffset)
CAST_ACCESSOR2(JSCollator); CAST_ACCESSOR2(JSCollator);
......
...@@ -60,13 +60,6 @@ void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options, ...@@ -60,13 +60,6 @@ void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
.FromJust()); .FromJust());
} }
void toLanguageTag(const icu::Locale& locale, char* tag) {
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(locale.getName(), tag, ULOC_FULLNAME_CAPACITY, FALSE,
&status);
CHECK(U_SUCCESS(status));
}
} // anonymous namespace } // anonymous namespace
// static // static
...@@ -135,6 +128,8 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -135,6 +128,8 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
Handle<String> locale = Handle<String>(collator->locale(), isolate);
icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status)); icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status));
CHECK(U_SUCCESS(status)); CHECK(U_SUCCESS(status));
...@@ -144,7 +139,6 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -144,7 +139,6 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
const char* legacy_collation_key = uloc_toLegacyKey(collation_key); const char* legacy_collation_key = uloc_toLegacyKey(collation_key);
DCHECK_NOT_NULL(legacy_collation_key); DCHECK_NOT_NULL(legacy_collation_key);
char bcp47_locale_tag[ULOC_FULLNAME_CAPACITY];
char legacy_collation_value[ULOC_FULLNAME_CAPACITY]; char legacy_collation_value[ULOC_FULLNAME_CAPACITY];
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
int32_t length = int32_t length =
...@@ -152,37 +146,13 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -152,37 +146,13 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
ULOC_FULLNAME_CAPACITY, status); ULOC_FULLNAME_CAPACITY, status);
if (length > 0 && U_SUCCESS(status)) { if (length > 0 && U_SUCCESS(status)) {
const char* collation_value = collation = uloc_toUnicodeLocaleType(collation_key, legacy_collation_value);
uloc_toUnicodeLocaleType(collation_key, legacy_collation_value); CHECK_NOT_NULL(collation);
CHECK_NOT_NULL(collation_value);
if (strcmp(collation_value, "search") == 0) { if (strcmp(collation, "search") == 0) {
usage = "search"; usage = "search";
// Search is disallowed as a collation value per spec. Let's
// use `default`, instead.
//
// https://tc39.github.io/ecma402/#sec-properties-of-intl-collator-instances
collation = "default"; collation = "default";
// We clone the icu::Locale because we don't want the
// icu_collator to be affected when we remove the collation key
// below.
icu::Locale new_icu_locale = icu_locale;
// The spec forbids the search as a collation value in the
// locale tag, so let's filter it out.
status = U_ZERO_ERROR;
new_icu_locale.setKeywordValue(legacy_collation_key, nullptr, status);
CHECK(U_SUCCESS(status));
toLanguageTag(new_icu_locale, bcp47_locale_tag);
} else {
collation = collation_value;
toLanguageTag(icu_locale, bcp47_locale_tag);
} }
} else {
toLanguageTag(icu_locale, bcp47_locale_tag);
} }
// 5. For each row of Table 2, except the header row, in table order, do // 5. For each row of Table 2, except the header row, in table order, do
...@@ -196,8 +166,11 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -196,8 +166,11 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
// [[Collation]] "collation" // [[Collation]] "collation"
// [[Numeric]] "numeric" kn // [[Numeric]] "numeric" kn
// [[CaseFirst]] "caseFirst" kf // [[CaseFirst]] "caseFirst" kf
CreateDataPropertyForOptions( CHECK(JSReceiver::CreateDataProperty(isolate, options,
isolate, options, isolate->factory()->locale_string(), bcp47_locale_tag); isolate->factory()->locale_string(),
locale, kDontThrow)
.FromJust());
CreateDataPropertyForOptions(isolate, options, CreateDataPropertyForOptions(isolate, options,
isolate->factory()->usage_string(), usage); isolate->factory()->usage_string(), usage);
CreateDataPropertyForOptions( CreateDataPropertyForOptions(
...@@ -235,6 +208,14 @@ UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) { ...@@ -235,6 +208,14 @@ UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) {
} }
} }
void SetNumericOption(icu::Collator* icu_collator, bool numeric) {
CHECK_NOT_NULL(icu_collator);
UErrorCode status = U_ZERO_ERROR;
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
}
void SetCaseFirstOption(icu::Collator* icu_collator, void SetCaseFirstOption(icu::Collator* icu_collator,
Intl::CaseFirst case_first) { Intl::CaseFirst case_first) {
CHECK_NOT_NULL(icu_collator); CHECK_NOT_NULL(icu_collator);
...@@ -325,32 +306,14 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate, ...@@ -325,32 +306,14 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
requested_locales, matcher, relevant_extension_keys); requested_locales, matcher, relevant_extension_keys);
// 18. Set collator.[[Locale]] to r.[[locale]]. // 18. Set collator.[[Locale]] to r.[[locale]].
Handle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
collator->set_locale(*locale_str);
icu::Locale icu_locale = r.icu_locale; icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus()); DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// 19. Let collation be r.[[co]]. // 19. Let collation be r.[[co]].
//
// r.[[co]] is already set as part of the icu::Locale creation as
// icu parses unicode extensions and sets the keywords.
//
// We need to sanitize the keywords based on certain ECMAScript rules.
//
// As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots:
// The values "standard" and "search" must not be used as elements
// in any [[SortLocaleData]][locale].co and
// [[SearchLocaleData]][locale].co list.
auto co_extension_it = extensions.find("co");
if (co_extension_it != extensions.end()) {
const std::string& value = co_extension_it->second;
if ((value == "search") || (value == "standard")) {
UErrorCode status = U_ZERO_ERROR;
const char* key = uloc_toLegacyKey("co");
icu_locale.setKeywordValue(key, nullptr, status);
CHECK(U_SUCCESS(status));
}
}
// 5. Set collator.[[Usage]] to usage. // 5. Set collator.[[Usage]] to usage.
// //
...@@ -410,19 +373,11 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate, ...@@ -410,19 +373,11 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
// passed in through the unicode extensions. // passed in through the unicode extensions.
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
if (found_numeric.FromJust()) { if (found_numeric.FromJust()) {
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, SetNumericOption(icu_collator.get(), numeric);
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
} else { } else {
auto kn_extension_it = extensions.find("kn"); auto kn_extension_it = r.extensions.find("kn");
if (kn_extension_it != extensions.end()) { if (kn_extension_it != r.extensions.end()) {
const std::string& value = kn_extension_it->second; SetNumericOption(icu_collator.get(), (kn_extension_it->second == "true"));
numeric = (value == "true");
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
} }
} }
...@@ -435,10 +390,10 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate, ...@@ -435,10 +390,10 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
if (case_first != Intl::CaseFirst::kUndefined) { if (case_first != Intl::CaseFirst::kUndefined) {
SetCaseFirstOption(icu_collator.get(), case_first); SetCaseFirstOption(icu_collator.get(), case_first);
} else { } else {
auto kf_extension_it = extensions.find("kf"); auto kf_extension_it = r.extensions.find("kf");
if (kf_extension_it != extensions.end()) { if (kf_extension_it != r.extensions.end()) {
const std::string& value = kf_extension_it->second; SetCaseFirstOption(icu_collator.get(),
SetCaseFirstOption(icu_collator.get(), ToCaseFirst(value.c_str())); ToCaseFirst(kf_extension_it->second.c_str()));
} }
} }
......
...@@ -50,6 +50,7 @@ class JSCollator : public JSObject { ...@@ -50,6 +50,7 @@ class JSCollator : public JSObject {
#define JS_COLLATOR_FIELDS(V) \ #define JS_COLLATOR_FIELDS(V) \
V(kICUCollatorOffset, kTaggedSize) \ V(kICUCollatorOffset, kTaggedSize) \
V(kBoundCompareOffset, kTaggedSize) \ V(kBoundCompareOffset, kTaggedSize) \
V(kLocaleOffset, kTaggedSize) \
/* Total size. */ \ /* Total size. */ \
V(kSize, 0) V(kSize, 0)
...@@ -58,6 +59,7 @@ class JSCollator : public JSObject { ...@@ -58,6 +59,7 @@ class JSCollator : public JSObject {
DECL_ACCESSORS(icu_collator, Managed<icu::Collator>) DECL_ACCESSORS(icu_collator, Managed<icu::Collator>)
DECL_ACCESSORS(bound_compare, Object); DECL_ACCESSORS(bound_compare, Object);
DECL_ACCESSORS2(locale, String)
OBJECT_CONSTRUCTORS(JSCollator, JSObject); OBJECT_CONSTRUCTORS(JSCollator, JSObject);
}; };
......
...@@ -846,10 +846,7 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize( ...@@ -846,10 +846,7 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize(
// ecma402/#sec-intl.datetimeformat-internal-slots // ecma402/#sec-intl.datetimeformat-internal-slots
// The value of the [[RelevantExtensionKeys]] internal slot is // The value of the [[RelevantExtensionKeys]] internal slot is
// « "ca", "nu", "hc" ». // « "ca", "nu", "hc" ».
// std::set<std::string> relevant_extension_keys = {"nu", "ca", "hc"};
// TODO(ftang): Add "hc" to this list of keys:
// https://bugs.chromium.org/p/v8/issues/detail?id=7482
std::set<std::string> relevant_extension_keys = {"nu", "ca"};
// 10. Let localeData be %DateTimeFormat%.[[LocaleData]]. // 10. Let localeData be %DateTimeFormat%.[[LocaleData]].
// 11. Let r be ResolveLocale( %DateTimeFormat%.[[AvailableLocales]], // 11. Let r be ResolveLocale( %DateTimeFormat%.[[AvailableLocales]],
...@@ -860,8 +857,6 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize( ...@@ -860,8 +857,6 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize(
isolate, JSDateTimeFormat::GetAvailableLocales(), requested_locales, isolate, JSDateTimeFormat::GetAvailableLocales(), requested_locales,
locale_matcher, relevant_extension_keys); locale_matcher, relevant_extension_keys);
// TODO(ftang): Make sure that "nu" key doesn't have "native",
// "traditio" or "finance" values.
icu::Locale icu_locale = r.icu_locale; icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus()); DCHECK(!icu_locale.isBogus());
......
...@@ -263,26 +263,6 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize( ...@@ -263,26 +263,6 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize(
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
number_format->set_locale(*locale_str); number_format->set_locale(*locale_str);
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// The list that is the value of the "nu" field of any locale field of
// [[LocaleData]] must not include the values "native", "traditio", or
// "finance".
//
// See https://tc39.github.io/ecma402/#sec-intl.numberformat-internal-slots
if (extensions.find("nu") != extensions.end()) {
const std::string value = extensions.at("nu");
if (value == "native" || value == "traditio" || value == "finance") {
// 10. Set numberFormat.[[NumberingSystem]] to r.[[nu]].
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue("nu", nullptr, status);
CHECK(U_SUCCESS(status));
}
}
// 11. Let dataLocale be r.[[dataLocale]]. // 11. Let dataLocale be r.[[dataLocale]].
// //
// 12. Let style be ? GetOption(options, "style", "string", « "decimal", // 12. Let style be ? GetOption(options, "style", "string", « "decimal",
...@@ -356,20 +336,20 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize( ...@@ -356,20 +336,20 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize(
std::unique_ptr<icu::NumberFormat> icu_number_format; std::unique_ptr<icu::NumberFormat> icu_number_format;
if (style == Style::DECIMAL) { if (style == Style::DECIMAL) {
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createInstance(icu_locale, status)); icu::NumberFormat::createInstance(r.icu_locale, status));
} else if (style == Style::PERCENT) { } else if (style == Style::PERCENT) {
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createPercentInstance(icu_locale, status)); icu::NumberFormat::createPercentInstance(r.icu_locale, status));
} else { } else {
DCHECK_EQ(style, Style::CURRENCY); DCHECK_EQ(style, Style::CURRENCY);
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createInstance(icu_locale, format_style, status)); icu::NumberFormat::createInstance(r.icu_locale, format_style, status));
} }
if (U_FAILURE(status) || icu_number_format.get() == nullptr) { if (U_FAILURE(status) || icu_number_format.get() == nullptr) {
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
// Remove extensions and try again. // Remove extensions and try again.
icu::Locale no_extension_locale(icu_locale.getBaseName()); icu::Locale no_extension_locale(r.icu_locale.getBaseName());
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createInstance(no_extension_locale, status)); icu::NumberFormat::createInstance(no_extension_locale, status));
......
...@@ -151,12 +151,6 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize( ...@@ -151,12 +151,6 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize(
Intl::ResolveLocale(isolate, JSPluralRules::GetAvailableLocales(), Intl::ResolveLocale(isolate, JSPluralRules::GetAvailableLocales(),
requested_locales, matcher, {}); requested_locales, matcher, {});
// 18. Set collator.[[Locale]] to r.[[locale]].
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// 12. Set pluralRules.[[Locale]] to the value of r.[[locale]]. // 12. Set pluralRules.[[Locale]] to the value of r.[[locale]].
Handle<String> locale_str = Handle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
...@@ -164,7 +158,7 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize( ...@@ -164,7 +158,7 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize(
std::unique_ptr<icu::PluralRules> icu_plural_rules; std::unique_ptr<icu::PluralRules> icu_plural_rules;
std::unique_ptr<icu::DecimalFormat> icu_decimal_format; std::unique_ptr<icu::DecimalFormat> icu_decimal_format;
InitializeICUPluralRules(isolate, icu_locale, type, &icu_plural_rules, InitializeICUPluralRules(isolate, r.icu_locale, type, &icu_plural_rules,
&icu_decimal_format); &icu_decimal_format);
CHECK_NOT_NULL(icu_plural_rules.get()); CHECK_NOT_NULL(icu_plural_rules.get());
CHECK_NOT_NULL(icu_decimal_format.get()); CHECK_NOT_NULL(icu_decimal_format.get());
......
...@@ -78,7 +78,7 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize( ...@@ -78,7 +78,7 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]). // requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
Intl::ResolvedLocale r = Intl::ResolvedLocale r =
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(), Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
requested_locales, matcher, {}); requested_locales, matcher, {"lb"});
// 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", « // 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
// "strict", "normal", "loose" », "normal"). // "strict", "normal", "loose" », "normal").
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_co = [
"invalid",
"search",
"standard",
"abce",
];
let valid_locales = [
"zh-u-co-zhuyin",
"zh-u-co-stroke",
"ar-u-co-compat",
"en-u-co-emoji",
"en-u-co-eor",
"zh-Hant-u-co-pinyin",
"ko-u-co-searchjl",
"ja-u-co-unihan",
];
invalid_co.forEach(function(co) {
let col = new Intl.Collator(["en-u-co-" + co]);
assertEquals("en", col.resolvedOptions().locale);
}
);
valid_locales.forEach(function(l) {
let col = new Intl.Collator([l + "-fo-obar"]);
assertEquals(l, col.resolvedOptions().locale);
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_kf = [
"invalid",
"abce",
"none",
"true",
];
let valid_kf= [
"false",
"upper",
"lower",
];
let locales = [
"en",
"fr",
];
invalid_kf.forEach(function(kf) {
let col = new Intl.Collator(["en-u-kf-" + kf + "-fo-obar"]);
assertEquals("en", col.resolvedOptions().locale);
}
);
valid_kf.forEach(function(kf) {
locales.forEach(function(base) {
let l = base + "-u-kf-" + kf;
let col = new Intl.Collator([l + "-fo-obar"]);
assertEquals(l, col.resolvedOptions().locale);
});
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_kn = [
"invalid",
"search",
"standard",
"abce",
];
let valid_kn = [
["en-u-kn", true, "en-u-kn"],
["en-u-kn-true", true, "en-u-kn"],
["en-u-kn-false",false, "en-u-kn-false"],
];
invalid_kn.forEach(function(kn) {
let col = new Intl.Collator(["en-u-kn-" + kn]);
assertEquals("en", col.resolvedOptions().locale);
}
);
valid_kn.forEach(function(l) {
let col = new Intl.Collator([l[0] + "-fo-obar"]);
assertEquals(l[1], col.resolvedOptions().numeric);
assertEquals(l[2], col.resolvedOptions().locale);
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_ca = [
"invalid",
"abce",
];
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
let valid_ca= [
"buddhist",
"chinese",
"coptic",
"dangi",
"ethioaa",
"ethiopic",
"gregory",
"hebrew",
"indian",
"islamic",
"islamic-umalqura",
"islamic-tbla",
"islamic-civil",
"islamic-rgsa",
"iso8601",
"japanese",
"persian",
"roc",
];
let locales = [
"en",
"ar",
];
invalid_ca.forEach(function(ca) {
let df = new Intl.DateTimeFormat(["en-u-ca-" + ca + "-fo-obar"]);
assertEquals("en", df.resolvedOptions().locale);
}
);
valid_ca.forEach(function(ca) {
locales.forEach(function(base) {
let l = base + "-u-ca-" + ca;
let df = new Intl.DateTimeFormat([l + "-fo-obar"]);
assertEquals(l, df.resolvedOptions().locale);
});
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_nu = [
"invalid",
"abce",
"finance",
"native",
"traditio",
];
// https://tc39.github.io/ecma402/#table-numbering-system-digits
let valid_nu= [
"arab",
"arabext",
"bali",
"beng",
"deva",
"fullwide",
"gujr",
"guru",
"hanidec",
"khmr",
"knda",
"laoo",
"latn",
"limb",
"mlym",
"mong",
"mymr",
"orya",
"tamldec",
"telu",
"thai",
"tibt",
];
let locales = [
"en",
"ar",
];
invalid_nu.forEach(function(nu) {
let df = new Intl.DateTimeFormat(["en-u-nu-" + nu + "-fo-obar"]);
assertEquals("en", df.resolvedOptions().locale);
}
);
valid_nu.forEach(function(nu) {
locales.forEach(function(base) {
let l = base + "-u-nu-" + nu;
let df = new Intl.DateTimeFormat([l + "-fo-obar"]);
assertEquals(l, df.resolvedOptions().locale);
});
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_nu = [
"invalid",
"abce",
"finance",
"native",
"traditio",
];
// https://tc39.github.io/ecma402/#table-numbering-system-digits
let valid_nu= [
"arab",
"arabext",
"bali",
"beng",
"deva",
"fullwide",
"gujr",
"guru",
"hanidec",
"khmr",
"knda",
"laoo",
"latn",
"limb",
"mlym",
"mong",
"mymr",
"orya",
"tamldec",
"telu",
"thai",
"tibt",
];
let locales = [
"en",
"ar",
];
invalid_nu.forEach(function(nu) {
let nf = new Intl.NumberFormat(["en-u-nu-" + nu + "-fo-obar"]);
assertEquals("en", nf.resolvedOptions().locale);
}
);
valid_nu.forEach(function(nu) {
locales.forEach(function(base) {
let l = base + "-u-nu-" + nu;
let nf = new Intl.NumberFormat([l + "-fo-obar"]);
assertEquals(l, nf.resolvedOptions().locale);
});
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
assertEquals(
"en-u-hc-h11-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h11-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-hc-h12-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h12-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-hc-h23-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h23-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-hc-h24-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h24-nu-arab"]).resolvedOptions().locale
);
// https://tc39.github.io/ecma402/#sec-intl.datetimeformat-internal-slots
// invalid hc should be removed
// [[LocaleData]][locale].hc must be « null, "h11", "h12", "h23", "h24" » for all locale values.
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h10-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h13-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h22-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h25-nu-arab"]).resolvedOptions().locale
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
let invalid_lb = [
"invalid",
"abce",
"breakall",
"keepall",
"none",
"standard",
];
let valid_lb= [
"strict",
"normal",
"loose",
];
let locales = [
"en",
"ja",
"zh",
];
invalid_lb.forEach(function(lb) {
let df = new Intl.Segmenter(["en-u-lb-" + lb + "-fo-obar"]);
assertEquals("en", df.resolvedOptions().locale);
}
);
valid_lb.forEach(function(lb) {
locales.forEach(function(base) {
let l = base + "-u-lb-" + lb;
let df = new Intl.Segmenter([l + "-fo-obar"]);
assertEquals(l, df.resolvedOptions().locale);
});
}
);
...@@ -4,10 +4,7 @@ ...@@ -4,10 +4,7 @@
// Environment Variables: LC_ALL=pt-BR.UTF8 // Environment Variables: LC_ALL=pt-BR.UTF8
// The data files packaged with d8 currently have Brazillian Portuguese
// DateTimeFormat but not Collation
if (this.Intl) { if (this.Intl) {
assertEquals('pt', Intl.Collator().resolvedOptions().locale); assertEquals('pt-BR', Intl.Collator().resolvedOptions().locale);
assertEquals('pt-BR', Intl.DateTimeFormat().resolvedOptions().locale); assertEquals('pt-BR', Intl.DateTimeFormat().resolvedOptions().locale);
} }
...@@ -586,8 +586,7 @@ ...@@ -586,8 +586,7 @@
'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL],
'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7481 # https://bugs.chromium.org/p/v8/issues/detail?id=8469
'intl402/NumberFormat/ignore-invalid-unicode-ext-values': [FAIL],
'intl402/DateTimeFormat/ignore-invalid-unicode-ext-values': [FAIL], 'intl402/DateTimeFormat/ignore-invalid-unicode-ext-values': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7482 # https://bugs.chromium.org/p/v8/issues/detail?id=7482
...@@ -604,7 +603,9 @@ ...@@ -604,7 +603,9 @@
# https://crbug.com/v8/7808 # https://crbug.com/v8/7808
'intl402/String/prototype/localeCompare/returns-same-results-as-Collator': [SKIP], 'intl402/String/prototype/localeCompare/returns-same-results-as-Collator': [SKIP],
'intl402/Collator/prototype/compare/bound-to-collator-instance': [SKIP], 'intl402/Collator/prototype/compare/bound-to-collator-instance': [SKIP],
'intl402/Collator/ignore-invalid-unicode-ext-values': [SKIP],
# https://github.com/tc39/ecma402/issues/223
'intl402/Collator/missing-unicode-ext-value-defaults-to-true': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=8260 # https://bugs.chromium.org/p/v8/issues/detail?id=8260
'intl402/Locale/constructor-non-iana-canon': [FAIL], 'intl402/Locale/constructor-non-iana-canon': [FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment