Commit 5182e1ad authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

Revert "[Intl] Validate u extension type"

This reverts commit 8d094249.

Reason for revert: Cause Memory regression

TBR=gsathya@chromium.org

Original change's description:
> [Intl] Validate u extension type
>
> Fix intl402/*/ignore-invalid-unicode-ext-values
> Add tests for other valid/invalid -u- ext values.
>
> Bug: v8:7481
> Change-Id: I429effd071bb03599a1e767bb2a9e9918a91b850
> Reviewed-on: https://chromium-review.googlesource.com/c/1351307
> Commit-Queue: Frank Tang <ftang@chromium.org>
> Reviewed-by: Sathya Gunasekaran <gsathya@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#58173}

TBR=cira@chromium.org,jshin@chromium.org,gsathya@chromium.org,ftang@chromium.org

# Not skipping CQ checks because original CL landed > 1 day ago.

Bug: v8:7481
Change-Id: Idabf2183684f264271ebe2b8c0ca10ae8e30d811
Reviewed-on: https://chromium-review.googlesource.com/c/1393499Reviewed-by: 's avatarFrank Tang <ftang@chromium.org>
Reviewed-by: 's avatarSathya Gunasekaran <gsathya@chromium.org>
Commit-Queue: Frank Tang <ftang@chromium.org>
Cr-Commit-Position: refs/heads/master@{#58506}
parent 707bfeca
...@@ -2059,7 +2059,6 @@ void JSCollator::JSCollatorVerify(Isolate* isolate) { ...@@ -2059,7 +2059,6 @@ void JSCollator::JSCollatorVerify(Isolate* isolate) {
JSObjectVerify(isolate); JSObjectVerify(isolate);
VerifyObjectField(isolate, kICUCollatorOffset); VerifyObjectField(isolate, kICUCollatorOffset);
VerifyObjectField(isolate, kBoundCompareOffset); VerifyObjectField(isolate, kBoundCompareOffset);
VerifyObjectField(isolate, kLocaleOffset);
} }
void JSDateTimeFormat::JSDateTimeFormatVerify(Isolate* isolate) { void JSDateTimeFormat::JSDateTimeFormatVerify(Isolate* isolate) {
......
...@@ -2035,7 +2035,6 @@ void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT ...@@ -2035,7 +2035,6 @@ void JSCollator::JSCollatorPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, *this, "JSCollator"); JSObjectPrintHeader(os, *this, "JSCollator");
os << "\n - icu collator: " << Brief(icu_collator()); os << "\n - icu collator: " << Brief(icu_collator());
os << "\n - bound compare: " << Brief(bound_compare()); os << "\n - bound compare: " << Brief(bound_compare());
os << "\n - locale: " << Brief(locale());
JSObjectPrintBody(os, *this); JSObjectPrintBody(os, *this);
} }
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
#include "src/string-case.h" #include "src/string-case.h"
#include "unicode/basictz.h" #include "unicode/basictz.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/calendar.h"
#include "unicode/coll.h" #include "unicode/coll.h"
#include "unicode/decimfmt.h" #include "unicode/decimfmt.h"
#include "unicode/locid.h" #include "unicode/locid.h"
...@@ -500,12 +499,24 @@ bool RemoveLocaleScriptTag(const std::string& icu_locale, ...@@ -500,12 +499,24 @@ bool RemoveLocaleScriptTag(const std::string& icu_locale,
std::set<std::string> Intl::BuildLocaleSet( std::set<std::string> Intl::BuildLocaleSet(
const icu::Locale* icu_available_locales, int32_t count) { const icu::Locale* icu_available_locales, int32_t count) {
std::set<std::string> locales; std::set<std::string> locales;
UErrorCode error = U_ZERO_ERROR;
char result[ULOC_FULLNAME_CAPACITY];
for (int32_t i = 0; i < count; ++i) { for (int32_t i = 0; i < count; ++i) {
std::string locale = Intl::ToLanguageTag(icu_available_locales[i]); const char* icu_name = icu_available_locales[i].getName();
error = U_ZERO_ERROR;
// No need to force strict BCP47 rules.
uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
// This shouldn't happen, but lets not break the user.
continue;
}
std::string locale(result);
locales.insert(locale); locales.insert(locale);
std::string shortened_locale; std::string shortened_locale;
if (RemoveLocaleScriptTag(locale, &shortened_locale)) { if (RemoveLocaleScriptTag(icu_name, &shortened_locale)) {
std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-'); std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
locales.insert(shortened_locale); locales.insert(shortened_locale);
} }
...@@ -544,9 +555,13 @@ std::string DefaultLocale(Isolate* isolate) { ...@@ -544,9 +555,13 @@ std::string DefaultLocale(Isolate* isolate) {
isolate->set_default_locale("en-US"); isolate->set_default_locale("en-US");
} else { } else {
// Set the locale // Set the locale
isolate->set_default_locale(default_locale.isBogus() char result[ULOC_FULLNAME_CAPACITY];
? "und" UErrorCode status = U_ZERO_ERROR;
: Intl::ToLanguageTag(default_locale)); int32_t length =
uloc_toLanguageTag(default_locale.getName(), result,
ULOC_FULLNAME_CAPACITY, FALSE, &status);
isolate->set_default_locale(
U_SUCCESS(status) ? std::string(result, length) : "und");
} }
DCHECK(!isolate->default_locale().empty()); DCHECK(!isolate->default_locale().empty());
} }
...@@ -752,12 +767,29 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate, ...@@ -752,12 +767,29 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
// propose to Ecma 402 to put a limit on the locale length or change ICU to // propose to Ecma 402 to put a limit on the locale length or change ICU to
// handle long locale names better. See // handle long locale names better. See
// https://unicode-org.atlassian.net/browse/ICU-13417 // https://unicode-org.atlassian.net/browse/ICU-13417
// forLanguageTag checks the structrual validity. If the input BCP47 UErrorCode error = U_ZERO_ERROR;
char icu_result[ULOC_FULLNAME_CAPACITY];
// uloc_forLanguageTag checks the structrual validity. If the input BCP47
// language tag is parsed all the way to the end, it indicates that the input // language tag is parsed all the way to the end, it indicates that the input
// is structurally valid. Due to a couple of bugs, we can't use it // is structurally valid. Due to a couple of bugs, we can't use it
// without Chromium patches or ICU 62 or earlier. // without Chromium patches or ICU 62 or earlier.
UErrorCode error = U_ZERO_ERROR; int parsed_length;
icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error); uloc_forLanguageTag(locale.c_str(), icu_result, ULOC_FULLNAME_CAPACITY,
&parsed_length, &error);
if (U_FAILURE(error) ||
static_cast<size_t>(parsed_length) < locale.length() ||
error == U_STRING_NOT_TERMINATED_WARNING) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
Nothing<std::string>());
}
// Force strict BCP47 rules.
char result[ULOC_FULLNAME_CAPACITY];
int32_t result_len = uloc_toLanguageTag(icu_result, result,
ULOC_FULLNAME_CAPACITY, TRUE, &error);
if (U_FAILURE(error)) { if (U_FAILURE(error)) {
THROW_NEW_ERROR_RETURN_VALUE( THROW_NEW_ERROR_RETURN_VALUE(
isolate, isolate,
...@@ -765,7 +797,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate, ...@@ -765,7 +797,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
Nothing<std::string>()); Nothing<std::string>());
} }
return Just(Intl::ToLanguageTag(icu_locale)); return Just(std::string(result, result_len));
} }
Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList( Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
...@@ -1432,44 +1464,6 @@ MaybeHandle<JSObject> Intl::SupportedLocalesOf( ...@@ -1432,44 +1464,6 @@ MaybeHandle<JSObject> Intl::SupportedLocalesOf(
} }
namespace { namespace {
template <typename T>
bool IsValidExtension(const icu::Locale& locale, const char* key,
const std::string& value) {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::StringEnumeration> enumeration(
T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
false, status));
if (U_SUCCESS(status)) {
int32_t length;
std::string legacy_type(uloc_toLegacyType(key, value.c_str()));
for (const char* item = enumeration->next(&length, status); item != nullptr;
item = enumeration->next(&length, status)) {
if (U_SUCCESS(status) && legacy_type == item) {
return true;
}
}
}
return false;
}
bool IsValidCalendar(const icu::Locale& locale, const std::string& value) {
return IsValidExtension<icu::Calendar>(locale, "calendar", value);
}
bool IsValidCollation(const icu::Locale& locale, const std::string& value) {
std::set<std::string> invalid_values = {"standard", "search"};
if (invalid_values.find(value) != invalid_values.end()) return false;
return IsValidExtension<icu::Collator>(locale, "collation", value);
}
bool IsValidNumberingSystem(const std::string& value) {
std::set<std::string> invalid_values = {"native", "traditio", "finance"};
if (invalid_values.find(value) != invalid_values.end()) return false;
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::NumberingSystem> numbering_system(
icu::NumberingSystem::createInstanceByName(value.c_str(), status));
return U_SUCCESS(status) && numbering_system.get() != nullptr;
}
std::map<std::string, std::string> LookupAndValidateUnicodeExtensions( std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) { icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
...@@ -1509,41 +1503,14 @@ std::map<std::string, std::string> LookupAndValidateUnicodeExtensions( ...@@ -1509,41 +1503,14 @@ std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) { if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value); const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
bool is_valid_value = false;
// 8.h.ii.1.a If keyLocaleData contains requestedValue, then
if (strcmp("ca", bcp47_key) == 0) {
is_valid_value = IsValidCalendar(*icu_locale, bcp47_value);
} else if (strcmp("co", bcp47_key) == 0) {
is_valid_value = IsValidCollation(*icu_locale, bcp47_value);
} else if (strcmp("hc", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("lb", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
std::set<std::string> valid_values = {"strict", "normal", "loose"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("kn", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
std::set<std::string> valid_values = {"true", "false"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("kf", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
std::set<std::string> valid_values = {"upper", "lower", "false"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("nu", bcp47_key) == 0) {
is_valid_value = IsValidNumberingSystem(bcp47_value);
}
if (is_valid_value) {
extensions.insert( extensions.insert(
std::pair<std::string, std::string>(bcp47_key, bcp47_value)); std::pair<std::string, std::string>(bcp47_key, bcp47_value));
continue; } else {
}
}
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
icu_locale->setKeywordValue(keyword, nullptr, status); icu_locale->setKeywordValue(keyword, nullptr, status);
CHECK(U_SUCCESS(status)); CHECK(U_SUCCESS(status));
} }
}
return extensions; return extensions;
} }
...@@ -1624,7 +1591,11 @@ Intl::ResolvedLocale Intl::ResolveLocale( ...@@ -1624,7 +1591,11 @@ Intl::ResolvedLocale Intl::ResolveLocale(
std::map<std::string, std::string> extensions = std::map<std::string, std::string> extensions =
LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys); LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale); char canonicalized_locale[ULOC_FULLNAME_CAPACITY];
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(icu_locale.getName(), canonicalized_locale,
ULOC_FULLNAME_CAPACITY, true, &status);
CHECK(U_SUCCESS(status));
// TODO(gsathya): Remove privateuse subtags from extensions. // TODO(gsathya): Remove privateuse subtags from extensions.
......
...@@ -22,7 +22,6 @@ OBJECT_CONSTRUCTORS_IMPL(JSCollator, JSObject) ...@@ -22,7 +22,6 @@ OBJECT_CONSTRUCTORS_IMPL(JSCollator, JSObject)
ACCESSORS2(JSCollator, icu_collator, Managed<icu::Collator>, kICUCollatorOffset) ACCESSORS2(JSCollator, icu_collator, Managed<icu::Collator>, kICUCollatorOffset)
ACCESSORS(JSCollator, bound_compare, Object, kBoundCompareOffset); ACCESSORS(JSCollator, bound_compare, Object, kBoundCompareOffset);
ACCESSORS2(JSCollator, locale, String, kLocaleOffset)
CAST_ACCESSOR2(JSCollator); CAST_ACCESSOR2(JSCollator);
......
...@@ -60,6 +60,13 @@ void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options, ...@@ -60,6 +60,13 @@ void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
.FromJust()); .FromJust());
} }
void toLanguageTag(const icu::Locale& locale, char* tag) {
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(locale.getName(), tag, ULOC_FULLNAME_CAPACITY, FALSE,
&status);
CHECK(U_SUCCESS(status));
}
} // anonymous namespace } // anonymous namespace
// static // static
...@@ -128,8 +135,6 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -128,8 +135,6 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
Handle<String> locale = Handle<String>(collator->locale(), isolate);
icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status)); icu::Locale icu_locale(icu_collator->getLocale(ULOC_VALID_LOCALE, status));
CHECK(U_SUCCESS(status)); CHECK(U_SUCCESS(status));
...@@ -139,6 +144,7 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -139,6 +144,7 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
const char* legacy_collation_key = uloc_toLegacyKey(collation_key); const char* legacy_collation_key = uloc_toLegacyKey(collation_key);
DCHECK_NOT_NULL(legacy_collation_key); DCHECK_NOT_NULL(legacy_collation_key);
char bcp47_locale_tag[ULOC_FULLNAME_CAPACITY];
char legacy_collation_value[ULOC_FULLNAME_CAPACITY]; char legacy_collation_value[ULOC_FULLNAME_CAPACITY];
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
int32_t length = int32_t length =
...@@ -146,13 +152,37 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -146,13 +152,37 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
ULOC_FULLNAME_CAPACITY, status); ULOC_FULLNAME_CAPACITY, status);
if (length > 0 && U_SUCCESS(status)) { if (length > 0 && U_SUCCESS(status)) {
collation = uloc_toUnicodeLocaleType(collation_key, legacy_collation_value); const char* collation_value =
CHECK_NOT_NULL(collation); uloc_toUnicodeLocaleType(collation_key, legacy_collation_value);
CHECK_NOT_NULL(collation_value);
if (strcmp(collation, "search") == 0) { if (strcmp(collation_value, "search") == 0) {
usage = "search"; usage = "search";
// Search is disallowed as a collation value per spec. Let's
// use `default`, instead.
//
// https://tc39.github.io/ecma402/#sec-properties-of-intl-collator-instances
collation = "default"; collation = "default";
// We clone the icu::Locale because we don't want the
// icu_collator to be affected when we remove the collation key
// below.
icu::Locale new_icu_locale = icu_locale;
// The spec forbids the search as a collation value in the
// locale tag, so let's filter it out.
status = U_ZERO_ERROR;
new_icu_locale.setKeywordValue(legacy_collation_key, nullptr, status);
CHECK(U_SUCCESS(status));
toLanguageTag(new_icu_locale, bcp47_locale_tag);
} else {
collation = collation_value;
toLanguageTag(icu_locale, bcp47_locale_tag);
} }
} else {
toLanguageTag(icu_locale, bcp47_locale_tag);
} }
// 5. For each row of Table 2, except the header row, in table order, do // 5. For each row of Table 2, except the header row, in table order, do
...@@ -166,11 +196,8 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, ...@@ -166,11 +196,8 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
// [[Collation]] "collation" // [[Collation]] "collation"
// [[Numeric]] "numeric" kn // [[Numeric]] "numeric" kn
// [[CaseFirst]] "caseFirst" kf // [[CaseFirst]] "caseFirst" kf
CHECK(JSReceiver::CreateDataProperty(isolate, options, CreateDataPropertyForOptions(
isolate->factory()->locale_string(), isolate, options, isolate->factory()->locale_string(), bcp47_locale_tag);
locale, kDontThrow)
.FromJust());
CreateDataPropertyForOptions(isolate, options, CreateDataPropertyForOptions(isolate, options,
isolate->factory()->usage_string(), usage); isolate->factory()->usage_string(), usage);
CreateDataPropertyForOptions( CreateDataPropertyForOptions(
...@@ -208,14 +235,6 @@ UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) { ...@@ -208,14 +235,6 @@ UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) {
} }
} }
void SetNumericOption(icu::Collator* icu_collator, bool numeric) {
CHECK_NOT_NULL(icu_collator);
UErrorCode status = U_ZERO_ERROR;
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
}
void SetCaseFirstOption(icu::Collator* icu_collator, void SetCaseFirstOption(icu::Collator* icu_collator,
Intl::CaseFirst case_first) { Intl::CaseFirst case_first) {
CHECK_NOT_NULL(icu_collator); CHECK_NOT_NULL(icu_collator);
...@@ -306,14 +325,32 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate, ...@@ -306,14 +325,32 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
requested_locales, matcher, relevant_extension_keys); requested_locales, matcher, relevant_extension_keys);
// 18. Set collator.[[Locale]] to r.[[locale]]. // 18. Set collator.[[Locale]] to r.[[locale]].
Handle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
collator->set_locale(*locale_str);
icu::Locale icu_locale = r.icu_locale; icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus()); DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// 19. Let collation be r.[[co]]. // 19. Let collation be r.[[co]].
//
// r.[[co]] is already set as part of the icu::Locale creation as
// icu parses unicode extensions and sets the keywords.
//
// We need to sanitize the keywords based on certain ECMAScript rules.
//
// As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots:
// The values "standard" and "search" must not be used as elements
// in any [[SortLocaleData]][locale].co and
// [[SearchLocaleData]][locale].co list.
auto co_extension_it = extensions.find("co");
if (co_extension_it != extensions.end()) {
const std::string& value = co_extension_it->second;
if ((value == "search") || (value == "standard")) {
UErrorCode status = U_ZERO_ERROR;
const char* key = uloc_toLegacyKey("co");
icu_locale.setKeywordValue(key, nullptr, status);
CHECK(U_SUCCESS(status));
}
}
// 5. Set collator.[[Usage]] to usage. // 5. Set collator.[[Usage]] to usage.
// //
...@@ -373,11 +410,19 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate, ...@@ -373,11 +410,19 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
// passed in through the unicode extensions. // passed in through the unicode extensions.
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
if (found_numeric.FromJust()) { if (found_numeric.FromJust()) {
SetNumericOption(icu_collator.get(), numeric); icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
} else { } else {
auto kn_extension_it = r.extensions.find("kn"); auto kn_extension_it = extensions.find("kn");
if (kn_extension_it != r.extensions.end()) { if (kn_extension_it != extensions.end()) {
SetNumericOption(icu_collator.get(), (kn_extension_it->second == "true")); const std::string& value = kn_extension_it->second;
numeric = (value == "true");
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
} }
} }
...@@ -390,10 +435,10 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate, ...@@ -390,10 +435,10 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
if (case_first != Intl::CaseFirst::kUndefined) { if (case_first != Intl::CaseFirst::kUndefined) {
SetCaseFirstOption(icu_collator.get(), case_first); SetCaseFirstOption(icu_collator.get(), case_first);
} else { } else {
auto kf_extension_it = r.extensions.find("kf"); auto kf_extension_it = extensions.find("kf");
if (kf_extension_it != r.extensions.end()) { if (kf_extension_it != extensions.end()) {
SetCaseFirstOption(icu_collator.get(), const std::string& value = kf_extension_it->second;
ToCaseFirst(kf_extension_it->second.c_str())); SetCaseFirstOption(icu_collator.get(), ToCaseFirst(value.c_str()));
} }
} }
......
...@@ -50,7 +50,6 @@ class JSCollator : public JSObject { ...@@ -50,7 +50,6 @@ class JSCollator : public JSObject {
#define JS_COLLATOR_FIELDS(V) \ #define JS_COLLATOR_FIELDS(V) \
V(kICUCollatorOffset, kTaggedSize) \ V(kICUCollatorOffset, kTaggedSize) \
V(kBoundCompareOffset, kTaggedSize) \ V(kBoundCompareOffset, kTaggedSize) \
V(kLocaleOffset, kTaggedSize) \
/* Total size. */ \ /* Total size. */ \
V(kSize, 0) V(kSize, 0)
...@@ -59,7 +58,6 @@ class JSCollator : public JSObject { ...@@ -59,7 +58,6 @@ class JSCollator : public JSObject {
DECL_ACCESSORS2(icu_collator, Managed<icu::Collator>) DECL_ACCESSORS2(icu_collator, Managed<icu::Collator>)
DECL_ACCESSORS(bound_compare, Object); DECL_ACCESSORS(bound_compare, Object);
DECL_ACCESSORS2(locale, String)
OBJECT_CONSTRUCTORS(JSCollator, JSObject); OBJECT_CONSTRUCTORS(JSCollator, JSObject);
}; };
......
...@@ -863,7 +863,10 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize( ...@@ -863,7 +863,10 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize(
// ecma402/#sec-intl.datetimeformat-internal-slots // ecma402/#sec-intl.datetimeformat-internal-slots
// The value of the [[RelevantExtensionKeys]] internal slot is // The value of the [[RelevantExtensionKeys]] internal slot is
// « "ca", "nu", "hc" ». // « "ca", "nu", "hc" ».
std::set<std::string> relevant_extension_keys = {"nu", "ca", "hc"}; //
// TODO(ftang): Add "hc" to this list of keys:
// https://bugs.chromium.org/p/v8/issues/detail?id=7482
std::set<std::string> relevant_extension_keys = {"nu", "ca"};
// 10. Let localeData be %DateTimeFormat%.[[LocaleData]]. // 10. Let localeData be %DateTimeFormat%.[[LocaleData]].
// 11. Let r be ResolveLocale( %DateTimeFormat%.[[AvailableLocales]], // 11. Let r be ResolveLocale( %DateTimeFormat%.[[AvailableLocales]],
...@@ -874,6 +877,8 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize( ...@@ -874,6 +877,8 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize(
isolate, JSDateTimeFormat::GetAvailableLocales(), requested_locales, isolate, JSDateTimeFormat::GetAvailableLocales(), requested_locales,
locale_matcher, relevant_extension_keys); locale_matcher, relevant_extension_keys);
// TODO(ftang): Make sure that "nu" key doesn't have "native",
// "traditio" or "finance" values.
icu::Locale icu_locale = r.icu_locale; icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus()); DCHECK(!icu_locale.isBogus());
......
...@@ -263,6 +263,26 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize( ...@@ -263,6 +263,26 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize(
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
number_format->set_locale(*locale_str); number_format->set_locale(*locale_str);
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// The list that is the value of the "nu" field of any locale field of
// [[LocaleData]] must not include the values "native", "traditio", or
// "finance".
//
// See https://tc39.github.io/ecma402/#sec-intl.numberformat-internal-slots
if (extensions.find("nu") != extensions.end()) {
const std::string value = extensions.at("nu");
if (value == "native" || value == "traditio" || value == "finance") {
// 10. Set numberFormat.[[NumberingSystem]] to r.[[nu]].
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue("nu", nullptr, status);
CHECK(U_SUCCESS(status));
}
}
// 11. Let dataLocale be r.[[dataLocale]]. // 11. Let dataLocale be r.[[dataLocale]].
// //
// 12. Let style be ? GetOption(options, "style", "string", « "decimal", // 12. Let style be ? GetOption(options, "style", "string", « "decimal",
...@@ -336,20 +356,20 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize( ...@@ -336,20 +356,20 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize(
std::unique_ptr<icu::NumberFormat> icu_number_format; std::unique_ptr<icu::NumberFormat> icu_number_format;
if (style == Style::DECIMAL) { if (style == Style::DECIMAL) {
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createInstance(r.icu_locale, status)); icu::NumberFormat::createInstance(icu_locale, status));
} else if (style == Style::PERCENT) { } else if (style == Style::PERCENT) {
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createPercentInstance(r.icu_locale, status)); icu::NumberFormat::createPercentInstance(icu_locale, status));
} else { } else {
DCHECK_EQ(style, Style::CURRENCY); DCHECK_EQ(style, Style::CURRENCY);
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createInstance(r.icu_locale, format_style, status)); icu::NumberFormat::createInstance(icu_locale, format_style, status));
} }
if (U_FAILURE(status) || icu_number_format.get() == nullptr) { if (U_FAILURE(status) || icu_number_format.get() == nullptr) {
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
// Remove extensions and try again. // Remove extensions and try again.
icu::Locale no_extension_locale(r.icu_locale.getBaseName()); icu::Locale no_extension_locale(icu_locale.getBaseName());
icu_number_format.reset( icu_number_format.reset(
icu::NumberFormat::createInstance(no_extension_locale, status)); icu::NumberFormat::createInstance(no_extension_locale, status));
......
...@@ -151,6 +151,12 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize( ...@@ -151,6 +151,12 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize(
Intl::ResolveLocale(isolate, JSPluralRules::GetAvailableLocales(), Intl::ResolveLocale(isolate, JSPluralRules::GetAvailableLocales(),
requested_locales, matcher, {}); requested_locales, matcher, {});
// 18. Set collator.[[Locale]] to r.[[locale]].
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// 12. Set pluralRules.[[Locale]] to the value of r.[[locale]]. // 12. Set pluralRules.[[Locale]] to the value of r.[[locale]].
Handle<String> locale_str = Handle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
...@@ -158,7 +164,7 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize( ...@@ -158,7 +164,7 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize(
std::unique_ptr<icu::PluralRules> icu_plural_rules; std::unique_ptr<icu::PluralRules> icu_plural_rules;
std::unique_ptr<icu::DecimalFormat> icu_decimal_format; std::unique_ptr<icu::DecimalFormat> icu_decimal_format;
InitializeICUPluralRules(isolate, r.icu_locale, type, &icu_plural_rules, InitializeICUPluralRules(isolate, icu_locale, type, &icu_plural_rules,
&icu_decimal_format); &icu_decimal_format);
CHECK_NOT_NULL(icu_plural_rules.get()); CHECK_NOT_NULL(icu_plural_rules.get());
CHECK_NOT_NULL(icu_decimal_format.get()); CHECK_NOT_NULL(icu_decimal_format.get());
......
...@@ -78,7 +78,7 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize( ...@@ -78,7 +78,7 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]). // requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
Intl::ResolvedLocale r = Intl::ResolvedLocale r =
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(), Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
requested_locales, matcher, {"lb"}); requested_locales, matcher, {});
// 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", « // 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
// "strict", "normal", "loose" », "normal"). // "strict", "normal", "loose" », "normal").
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_co = [
"invalid",
"search",
"standard",
"abce",
];
let valid_locales = [
"zh-u-co-zhuyin",
"zh-u-co-stroke",
"ar-u-co-compat",
"en-u-co-emoji",
"en-u-co-eor",
"zh-Hant-u-co-pinyin",
"ko-u-co-searchjl",
"ja-u-co-unihan",
];
invalid_co.forEach(function(co) {
let col = new Intl.Collator(["en-u-co-" + co]);
assertEquals("en", col.resolvedOptions().locale);
}
);
valid_locales.forEach(function(l) {
let col = new Intl.Collator([l + "-fo-obar"]);
assertEquals(l, col.resolvedOptions().locale);
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_kf = [
"invalid",
"abce",
"none",
"true",
];
let valid_kf= [
"false",
"upper",
"lower",
];
let locales = [
"en",
"fr",
];
invalid_kf.forEach(function(kf) {
let col = new Intl.Collator(["en-u-kf-" + kf + "-fo-obar"]);
assertEquals("en", col.resolvedOptions().locale);
}
);
valid_kf.forEach(function(kf) {
locales.forEach(function(base) {
let l = base + "-u-kf-" + kf;
let col = new Intl.Collator([l + "-fo-obar"]);
assertEquals(l, col.resolvedOptions().locale);
});
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_kn = [
"invalid",
"search",
"standard",
"abce",
];
let valid_kn = [
["en-u-kn", true, "en-u-kn"],
["en-u-kn-true", true, "en-u-kn"],
["en-u-kn-false",false, "en-u-kn-false"],
];
invalid_kn.forEach(function(kn) {
let col = new Intl.Collator(["en-u-kn-" + kn]);
assertEquals("en", col.resolvedOptions().locale);
}
);
valid_kn.forEach(function(l) {
let col = new Intl.Collator([l[0] + "-fo-obar"]);
assertEquals(l[1], col.resolvedOptions().numeric);
assertEquals(l[2], col.resolvedOptions().locale);
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_ca = [
"invalid",
"abce",
];
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
let valid_ca= [
"buddhist",
"chinese",
"coptic",
"dangi",
"ethioaa",
"ethiopic",
"gregory",
"hebrew",
"indian",
"islamic",
"islamic-umalqura",
"islamic-tbla",
"islamic-civil",
"islamic-rgsa",
"iso8601",
"japanese",
"persian",
"roc",
];
let locales = [
"en",
"ar",
];
invalid_ca.forEach(function(ca) {
let df = new Intl.DateTimeFormat(["en-u-ca-" + ca + "-fo-obar"]);
assertEquals("en", df.resolvedOptions().locale);
}
);
valid_ca.forEach(function(ca) {
locales.forEach(function(base) {
let l = base + "-u-ca-" + ca;
let df = new Intl.DateTimeFormat([l + "-fo-obar"]);
assertEquals(l, df.resolvedOptions().locale);
});
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_nu = [
"invalid",
"abce",
"finance",
"native",
"traditio",
];
// https://tc39.github.io/ecma402/#table-numbering-system-digits
let valid_nu= [
"arab",
"arabext",
"bali",
"beng",
"deva",
"fullwide",
"gujr",
"guru",
"hanidec",
"khmr",
"knda",
"laoo",
"latn",
"limb",
"mlym",
"mong",
"mymr",
"orya",
"tamldec",
"telu",
"thai",
"tibt",
];
let locales = [
"en",
"ar",
];
invalid_nu.forEach(function(nu) {
let df = new Intl.DateTimeFormat(["en-u-nu-" + nu + "-fo-obar"]);
assertEquals("en", df.resolvedOptions().locale);
}
);
valid_nu.forEach(function(nu) {
locales.forEach(function(base) {
let l = base + "-u-nu-" + nu;
let df = new Intl.DateTimeFormat([l + "-fo-obar"]);
assertEquals(l, df.resolvedOptions().locale);
});
}
);
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
[ALWAYS, { [ALWAYS, {
# TODO(jochen): The following test is flaky. # TODO(jochen): The following test is flaky.
'overrides/caching': [PASS, FAIL], 'overrides/caching': [PASS, FAIL],
'date-format/check-hc-option': [FAIL],
}], # ALWAYS }], # ALWAYS
['variant == no_wasm_traps', { ['variant == no_wasm_traps', {
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
let invalid_nu = [
"invalid",
"abce",
"finance",
"native",
"traditio",
];
// https://tc39.github.io/ecma402/#table-numbering-system-digits
let valid_nu= [
"arab",
"arabext",
"bali",
"beng",
"deva",
"fullwide",
"gujr",
"guru",
"hanidec",
"khmr",
"knda",
"laoo",
"latn",
"limb",
"mlym",
"mong",
"mymr",
"orya",
"tamldec",
"telu",
"thai",
"tibt",
];
let locales = [
"en",
"ar",
];
invalid_nu.forEach(function(nu) {
let nf = new Intl.NumberFormat(["en-u-nu-" + nu + "-fo-obar"]);
assertEquals("en", nf.resolvedOptions().locale);
}
);
valid_nu.forEach(function(nu) {
locales.forEach(function(base) {
let l = base + "-u-nu-" + nu;
let nf = new Intl.NumberFormat([l + "-fo-obar"]);
assertEquals(l, nf.resolvedOptions().locale);
});
}
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
assertEquals(
"en-u-hc-h11-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h11-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-hc-h12-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h12-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-hc-h23-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h23-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-hc-h24-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h24-nu-arab"]).resolvedOptions().locale
);
// https://tc39.github.io/ecma402/#sec-intl.datetimeformat-internal-slots
// invalid hc should be removed
// [[LocaleData]][locale].hc must be « null, "h11", "h12", "h23", "h24" » for all locale values.
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h10-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h13-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h22-nu-arab"]).resolvedOptions().locale
);
assertEquals(
"en-u-nu-arab",
new Intl.DateTimeFormat(["en-u-hc-h25-nu-arab"]).resolvedOptions().locale
);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
let invalid_lb = [
"invalid",
"abce",
"breakall",
"keepall",
"none",
"standard",
];
let valid_lb= [
"strict",
"normal",
"loose",
];
let locales = [
"en",
"ja",
"zh",
];
invalid_lb.forEach(function(lb) {
let df = new Intl.Segmenter(["en-u-lb-" + lb + "-fo-obar"]);
assertEquals("en", df.resolvedOptions().locale);
}
);
valid_lb.forEach(function(lb) {
locales.forEach(function(base) {
let l = base + "-u-lb-" + lb;
let df = new Intl.Segmenter([l + "-fo-obar"]);
assertEquals(l, df.resolvedOptions().locale);
});
}
);
...@@ -4,7 +4,10 @@ ...@@ -4,7 +4,10 @@
// Environment Variables: LC_ALL=pt-BR.UTF8 // Environment Variables: LC_ALL=pt-BR.UTF8
// The data files packaged with d8 currently have Brazillian Portuguese
// DateTimeFormat but not Collation
if (this.Intl) { if (this.Intl) {
assertEquals('pt-BR', Intl.Collator().resolvedOptions().locale); assertEquals('pt', Intl.Collator().resolvedOptions().locale);
assertEquals('pt-BR', Intl.DateTimeFormat().resolvedOptions().locale); assertEquals('pt-BR', Intl.DateTimeFormat().resolvedOptions().locale);
} }
...@@ -73,6 +73,9 @@ ...@@ -73,6 +73,9 @@
'intl402/DateTimeFormat/prototype/format/format-function-name': [FAIL], 'intl402/DateTimeFormat/prototype/format/format-function-name': [FAIL],
'intl402/Collator/prototype/compare/compare-function-name': [FAIL], 'intl402/Collator/prototype/compare/compare-function-name': [FAIL],
'intl402/DateTimeFormat/prototype/resolvedOptions/hourCycle': [FAIL],
# https://code.google.com/p/v8/issues/detail?id=4251 # https://code.google.com/p/v8/issues/detail?id=4251
'language/expressions/postfix-increment/S11.3.1_A5_T1': [FAIL], 'language/expressions/postfix-increment/S11.3.1_A5_T1': [FAIL],
'language/expressions/postfix-increment/S11.3.1_A5_T2': [FAIL], 'language/expressions/postfix-increment/S11.3.1_A5_T2': [FAIL],
...@@ -568,6 +571,10 @@ ...@@ -568,6 +571,10 @@
'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL],
'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7481
'intl402/NumberFormat/ignore-invalid-unicode-ext-values': [FAIL],
'intl402/DateTimeFormat/ignore-invalid-unicode-ext-values': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7482 # https://bugs.chromium.org/p/v8/issues/detail?id=7482
'intl402/DateTimeFormat/prototype/resolvedOptions/resolved-locale-with-hc-unicode': [FAIL], 'intl402/DateTimeFormat/prototype/resolvedOptions/resolved-locale-with-hc-unicode': [FAIL],
...@@ -577,8 +584,12 @@ ...@@ -577,8 +584,12 @@
# https://bugs.chromium.org/p/v8/issues/detail?id=7669 # https://bugs.chromium.org/p/v8/issues/detail?id=7669
'intl402/Intl/getCanonicalLocales/canonicalized-tags': [FAIL], 'intl402/Intl/getCanonicalLocales/canonicalized-tags': [FAIL],
# https://github.com/tc39/ecma402/issues/223 # Tests assume that the sort order of "same elements" (comparator returns 0)
'intl402/Collator/missing-unicode-ext-value-defaults-to-true': [FAIL], # is deterministic.
# https://crbug.com/v8/7808
'intl402/String/prototype/localeCompare/returns-same-results-as-Collator': [SKIP],
'intl402/Collator/prototype/compare/bound-to-collator-instance': [SKIP],
'intl402/Collator/ignore-invalid-unicode-ext-values': [SKIP],
# https://bugs.chromium.org/p/v8/issues/detail?id=8260 # https://bugs.chromium.org/p/v8/issues/detail?id=8260
'intl402/Locale/constructor-non-iana-canon': [FAIL], 'intl402/Locale/constructor-non-iana-canon': [FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment