Commit 6bff5d79 authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

[Intl] Reland part of "Validate u extension type"

Fix intl402/*/ignore-invalid-unicode-ext-values
intl/*/check-*

Bug: v8:7481
Change-Id: I5c9d7e19c010953ff9503a2e0981fa148278a451
Reviewed-on: https://chromium-review.googlesource.com/c/1396739
Commit-Queue: Frank Tang <ftang@chromium.org>
Reviewed-by: 's avatarSathya Gunasekaran <gsathya@chromium.org>
Cr-Commit-Position: refs/heads/master@{#58606}
parent 64c85cf4
......@@ -26,6 +26,7 @@
#include "src/string-case.h"
#include "unicode/basictz.h"
#include "unicode/brkiter.h"
#include "unicode/calendar.h"
#include "unicode/coll.h"
#include "unicode/decimfmt.h"
#include "unicode/locid.h"
......@@ -776,7 +777,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
Nothing<std::string>());
}
return maybe_to_language_tag;
return Intl::ToLanguageTag(icu_locale);
}
Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
......@@ -1443,6 +1444,44 @@ MaybeHandle<JSObject> Intl::SupportedLocalesOf(
}
namespace {
template <typename T>
bool IsValidExtension(const icu::Locale& locale, const char* key,
const std::string& value) {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::StringEnumeration> enumeration(
T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
false, status));
if (U_SUCCESS(status)) {
int32_t length;
std::string legacy_type(uloc_toLegacyType(key, value.c_str()));
for (const char* item = enumeration->next(&length, status); item != nullptr;
item = enumeration->next(&length, status)) {
if (U_SUCCESS(status) && legacy_type == item) {
return true;
}
}
}
return false;
}
bool IsValidCalendar(const icu::Locale& locale, const std::string& value) {
return IsValidExtension<icu::Calendar>(locale, "calendar", value);
}
bool IsValidCollation(const icu::Locale& locale, const std::string& value) {
std::set<std::string> invalid_values = {"standard", "search"};
if (invalid_values.find(value) != invalid_values.end()) return false;
return IsValidExtension<icu::Collator>(locale, "collation", value);
}
bool IsValidNumberingSystem(const std::string& value) {
std::set<std::string> invalid_values = {"native", "traditio", "finance"};
if (invalid_values.find(value) != invalid_values.end()) return false;
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::NumberingSystem> numbering_system(
icu::NumberingSystem::createInstanceByName(value.c_str(), status));
return U_SUCCESS(status) && numbering_system.get() != nullptr;
}
std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
......@@ -1482,13 +1521,40 @@ std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
extensions.insert(
std::pair<std::string, std::string>(bcp47_key, bcp47_value));
} else {
status = U_ZERO_ERROR;
icu_locale->setKeywordValue(keyword, nullptr, status);
CHECK(U_SUCCESS(status));
bool is_valid_value = false;
// 8.h.ii.1.a If keyLocaleData contains requestedValue, then
if (strcmp("ca", bcp47_key) == 0) {
is_valid_value = IsValidCalendar(*icu_locale, bcp47_value);
} else if (strcmp("co", bcp47_key) == 0) {
is_valid_value = IsValidCollation(*icu_locale, bcp47_value);
} else if (strcmp("hc", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("lb", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
std::set<std::string> valid_values = {"strict", "normal", "loose"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("kn", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
std::set<std::string> valid_values = {"true", "false"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("kf", bcp47_key) == 0) {
// https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
std::set<std::string> valid_values = {"upper", "lower", "false"};
is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
} else if (strcmp("nu", bcp47_key) == 0) {
is_valid_value = IsValidNumberingSystem(bcp47_value);
}
if (is_valid_value) {
extensions.insert(
std::pair<std::string, std::string>(bcp47_key, bcp47_value));
continue;
}
}
status = U_ZERO_ERROR;
icu_locale->setKeywordValue(keyword, nullptr, status);
CHECK(U_SUCCESS(status));
}
return extensions;
......
......@@ -60,13 +60,6 @@ void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
.FromJust());
}
void toLanguageTag(const icu::Locale& locale, char* tag) {
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(locale.getName(), tag, ULOC_FULLNAME_CAPACITY, FALSE,
&status);
CHECK(U_SUCCESS(status));
}
} // anonymous namespace
// static
......@@ -144,13 +137,13 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
const char* legacy_collation_key = uloc_toLegacyKey(collation_key);
DCHECK_NOT_NULL(legacy_collation_key);
char bcp47_locale_tag[ULOC_FULLNAME_CAPACITY];
char legacy_collation_value[ULOC_FULLNAME_CAPACITY];
status = U_ZERO_ERROR;
int32_t length =
icu_locale.getKeywordValue(legacy_collation_key, legacy_collation_value,
ULOC_FULLNAME_CAPACITY, status);
std::string locale;
if (length > 0 && U_SUCCESS(status)) {
const char* collation_value =
uloc_toUnicodeLocaleType(collation_key, legacy_collation_value);
......@@ -176,13 +169,13 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
new_icu_locale.setKeywordValue(legacy_collation_key, nullptr, status);
CHECK(U_SUCCESS(status));
toLanguageTag(new_icu_locale, bcp47_locale_tag);
locale = Intl::ToLanguageTag(new_icu_locale).FromJust();
} else {
collation = collation_value;
toLanguageTag(icu_locale, bcp47_locale_tag);
locale = Intl::ToLanguageTag(icu_locale).FromJust();
}
} else {
toLanguageTag(icu_locale, bcp47_locale_tag);
locale = Intl::ToLanguageTag(icu_locale).FromJust();
}
// 5. For each row of Table 2, except the header row, in table order, do
......@@ -197,7 +190,7 @@ Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
// [[Numeric]] "numeric" kn
// [[CaseFirst]] "caseFirst" kf
CreateDataPropertyForOptions(
isolate, options, isolate->factory()->locale_string(), bcp47_locale_tag);
isolate, options, isolate->factory()->locale_string(), locale.c_str());
CreateDataPropertyForOptions(isolate, options,
isolate->factory()->usage_string(), usage);
CreateDataPropertyForOptions(
......@@ -235,6 +228,14 @@ UColAttributeValue ToUColAttributeValue(Intl::CaseFirst case_first) {
}
}
void SetNumericOption(icu::Collator* icu_collator, bool numeric) {
CHECK_NOT_NULL(icu_collator);
UErrorCode status = U_ZERO_ERROR;
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
}
void SetCaseFirstOption(icu::Collator* icu_collator,
Intl::CaseFirst case_first) {
CHECK_NOT_NULL(icu_collator);
......@@ -328,29 +329,7 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// 19. Let collation be r.[[co]].
//
// r.[[co]] is already set as part of the icu::Locale creation as
// icu parses unicode extensions and sets the keywords.
//
// We need to sanitize the keywords based on certain ECMAScript rules.
//
// As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots:
// The values "standard" and "search" must not be used as elements
// in any [[SortLocaleData]][locale].co and
// [[SearchLocaleData]][locale].co list.
auto co_extension_it = extensions.find("co");
if (co_extension_it != extensions.end()) {
const std::string& value = co_extension_it->second;
if ((value == "search") || (value == "standard")) {
UErrorCode status = U_ZERO_ERROR;
const char* key = uloc_toLegacyKey("co");
icu_locale.setKeywordValue(key, nullptr, status);
CHECK(U_SUCCESS(status));
}
}
// 5. Set collator.[[Usage]] to usage.
//
......@@ -410,19 +389,11 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
// passed in through the unicode extensions.
status = U_ZERO_ERROR;
if (found_numeric.FromJust()) {
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
SetNumericOption(icu_collator.get(), numeric);
} else {
auto kn_extension_it = extensions.find("kn");
if (kn_extension_it != extensions.end()) {
const std::string& value = kn_extension_it->second;
numeric = (value == "true");
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
numeric ? UCOL_ON : UCOL_OFF, status);
CHECK(U_SUCCESS(status));
auto kn_extension_it = r.extensions.find("kn");
if (kn_extension_it != r.extensions.end()) {
SetNumericOption(icu_collator.get(), (kn_extension_it->second == "true"));
}
}
......@@ -435,10 +406,10 @@ MaybeHandle<JSCollator> JSCollator::Initialize(Isolate* isolate,
if (case_first != Intl::CaseFirst::kUndefined) {
SetCaseFirstOption(icu_collator.get(), case_first);
} else {
auto kf_extension_it = extensions.find("kf");
if (kf_extension_it != extensions.end()) {
const std::string& value = kf_extension_it->second;
SetCaseFirstOption(icu_collator.get(), ToCaseFirst(value.c_str()));
auto kf_extension_it = r.extensions.find("kf");
if (kf_extension_it != r.extensions.end()) {
SetCaseFirstOption(icu_collator.get(),
ToCaseFirst(kf_extension_it->second.c_str()));
}
}
......
......@@ -865,10 +865,7 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize(
// ecma402/#sec-intl.datetimeformat-internal-slots
// The value of the [[RelevantExtensionKeys]] internal slot is
// « "ca", "nu", "hc" ».
//
// TODO(ftang): Add "hc" to this list of keys:
// https://bugs.chromium.org/p/v8/issues/detail?id=7482
std::set<std::string> relevant_extension_keys = {"nu", "ca"};
std::set<std::string> relevant_extension_keys = {"nu", "ca", "hc"};
// 10. Let localeData be %DateTimeFormat%.[[LocaleData]].
// 11. Let r be ResolveLocale( %DateTimeFormat%.[[AvailableLocales]],
......@@ -879,8 +876,6 @@ MaybeHandle<JSDateTimeFormat> JSDateTimeFormat::Initialize(
isolate, JSDateTimeFormat::GetAvailableLocales(), requested_locales,
locale_matcher, relevant_extension_keys);
// TODO(ftang): Make sure that "nu" key doesn't have "native",
// "traditio" or "finance" values.
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
......
......@@ -263,26 +263,6 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize(
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
number_format->set_locale(*locale_str);
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// The list that is the value of the "nu" field of any locale field of
// [[LocaleData]] must not include the values "native", "traditio", or
// "finance".
//
// See https://tc39.github.io/ecma402/#sec-intl.numberformat-internal-slots
if (extensions.find("nu") != extensions.end()) {
const std::string value = extensions.at("nu");
if (value == "native" || value == "traditio" || value == "finance") {
// 10. Set numberFormat.[[NumberingSystem]] to r.[[nu]].
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue("nu", nullptr, status);
CHECK(U_SUCCESS(status));
}
}
// 11. Let dataLocale be r.[[dataLocale]].
//
// 12. Let style be ? GetOption(options, "style", "string", « "decimal",
......@@ -356,20 +336,20 @@ MaybeHandle<JSNumberFormat> JSNumberFormat::Initialize(
std::unique_ptr<icu::NumberFormat> icu_number_format;
if (style == Style::DECIMAL) {
icu_number_format.reset(
icu::NumberFormat::createInstance(icu_locale, status));
icu::NumberFormat::createInstance(r.icu_locale, status));
} else if (style == Style::PERCENT) {
icu_number_format.reset(
icu::NumberFormat::createPercentInstance(icu_locale, status));
icu::NumberFormat::createPercentInstance(r.icu_locale, status));
} else {
DCHECK_EQ(style, Style::CURRENCY);
icu_number_format.reset(
icu::NumberFormat::createInstance(icu_locale, format_style, status));
icu::NumberFormat::createInstance(r.icu_locale, format_style, status));
}
if (U_FAILURE(status) || icu_number_format.get() == nullptr) {
status = U_ZERO_ERROR;
// Remove extensions and try again.
icu::Locale no_extension_locale(icu_locale.getBaseName());
icu::Locale no_extension_locale(r.icu_locale.getBaseName());
icu_number_format.reset(
icu::NumberFormat::createInstance(no_extension_locale, status));
......
......@@ -151,12 +151,6 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize(
Intl::ResolveLocale(isolate, JSPluralRules::GetAvailableLocales(),
requested_locales, matcher, {});
// 18. Set collator.[[Locale]] to r.[[locale]].
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
std::map<std::string, std::string> extensions = r.extensions;
// 12. Set pluralRules.[[Locale]] to the value of r.[[locale]].
Handle<String> locale_str =
isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
......@@ -164,7 +158,7 @@ MaybeHandle<JSPluralRules> JSPluralRules::Initialize(
std::unique_ptr<icu::PluralRules> icu_plural_rules;
std::unique_ptr<icu::DecimalFormat> icu_decimal_format;
InitializeICUPluralRules(isolate, icu_locale, type, &icu_plural_rules,
InitializeICUPluralRules(isolate, r.icu_locale, type, &icu_plural_rules,
&icu_decimal_format);
CHECK_NOT_NULL(icu_plural_rules.get());
CHECK_NOT_NULL(icu_decimal_format.get());
......
......@@ -78,7 +78,7 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
Intl::ResolvedLocale r =
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
requested_locales, matcher, {});
requested_locales, matcher, {"lb"});
// 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
// "strict", "normal", "loose" », "normal").
......
......@@ -31,14 +31,8 @@
'overrides/caching': [PASS, FAIL],
# https://code.google.com/p/v8/issues/detail?id=7481
'regress-7481': [FAIL],
'collator/check-kf-option': [FAIL],
'collator/check-kn-option': [FAIL],
'date-format/check-ca-option': [FAIL],
'date-format/check-hc-option': [FAIL],
'date-format/check-nu-option': [FAIL],
'number-format/check-nu-option': [FAIL],
'segmenter/check-lb-option': [FAIL],
}], # ALWAYS
['variant == no_wasm_traps', {
......
......@@ -73,9 +73,6 @@
'intl402/DateTimeFormat/prototype/format/format-function-name': [FAIL],
'intl402/Collator/prototype/compare/compare-function-name': [FAIL],
'intl402/DateTimeFormat/prototype/resolvedOptions/hourCycle': [FAIL],
# https://code.google.com/p/v8/issues/detail?id=4251
'language/expressions/postfix-increment/S11.3.1_A5_T1': [FAIL],
'language/expressions/postfix-increment/S11.3.1_A5_T2': [FAIL],
......@@ -539,10 +536,6 @@
'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL],
'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7481
'intl402/NumberFormat/ignore-invalid-unicode-ext-values': [FAIL],
'intl402/DateTimeFormat/ignore-invalid-unicode-ext-values': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7482
'intl402/DateTimeFormat/prototype/resolvedOptions/resolved-locale-with-hc-unicode': [FAIL],
......@@ -557,7 +550,6 @@
# https://crbug.com/v8/7808
'intl402/String/prototype/localeCompare/returns-same-results-as-Collator': [SKIP],
'intl402/Collator/prototype/compare/bound-to-collator-instance': [SKIP],
'intl402/Collator/ignore-invalid-unicode-ext-values': [SKIP],
# https://bugs.chromium.org/p/v8/issues/detail?id=8260
'intl402/Locale/constructor-non-iana-canon': [FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment