Commit 919270e0 authored by Jungshik Shin's avatar Jungshik Shin Committed by Commit Bot

Fix the fast path for locale canonicalization

Not all 2 or 3 letter language codes are canonical. Some of them need
to be canonicalized.

Specifically, exclude {jw,ji,iw,in} and all three-letter codes from the
fast path except for 'fil'.

{jw,ji,iw,in} are deprecated ISO 639 codes for
{Javanese, Yiddish, Hebrew, Indonesian}. They should be
canonicalized to {jv,yi,he,id}. So, do not return early
in the fast path, but pass it down to the full canonicalization.

In addition, there are 70+ deprecated 3-letter codes that need to be
replaced by their modern equivalents. Instead of checking and replacing
in v8, just pass them to ICU to handle.

Along with the following ICU change, two more tests will pass.

  https://chromium-review.googlesource.com/c/chromium/deps/icu/+/1026797

These two tests still fail because of the disagreement between ICU and the test
expectations about 5 grandfathered tags with no preferred value (e.g.
i-default, zh-min, cel-gaulish).

  'intl402/Intl/getCanonicalLocales/canonicalized-tags'
  'intl402/Intl/getCanonicalLocales/preferred-grandfathered'

Bug: v8:5693, v8:7669
Test: test262/intl402/language-tags-canonicalized.js
Test: test262/intl402/Intl/preferred-variants.js
Test: intl/general/language_tags_with_preferred_values.js
Cq-Include-Trybots: luci.v8.try:v8_linux_noi18n_rel_ng
Change-Id: Ide7e9c90ac046859604c7b71c641f84ce9c64be5
Reviewed-on: https://chromium-review.googlesource.com/1023379Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Jungshik Shin <jshin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#52823}
parent e3c9f266
...@@ -782,15 +782,19 @@ function canonicalizeLanguageTag(localeID) { ...@@ -782,15 +782,19 @@ function canonicalizeLanguageTag(localeID) {
throw %make_type_error(kLanguageID); throw %make_type_error(kLanguageID);
} }
// Optimize for the most common case; a language code alone in
// the canonical form/lowercase (e.g. "en", "fil").
if (IS_STRING(localeID) &&
!IS_NULL(%regexp_internal_match(/^[a-z]{2,3}$/, localeID))) {
return localeID;
}
var localeString = TO_STRING(localeID); var localeString = TO_STRING(localeID);
// Optimize for the most common case; a 2-letter language code in the
// canonical form/lowercase that is not one of deprecated codes
// (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
// codes. Instead, let them be handled by ICU in the slow path. Besides,
// fast-track 'fil' (3-letter canonical code).
if ((!IS_NULL(%regexp_internal_match(/^[a-z]{2}$/, localeString)) &&
IS_NULL(%regexp_internal_match(/^(in|iw|ji|jw)$/, localeString))) ||
localeString === "fil") {
return localeString;
}
if (isStructuallyValidLanguageTag(localeString) === false) { if (isStructuallyValidLanguageTag(localeString) === false) {
throw %make_range_error(kInvalidLanguageTag, localeString); throw %make_range_error(kInvalidLanguageTag, localeString);
} }
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
[
// Redundant tag with preferred value.
["sgn-de", "gsg"],
["sgn-de-u-co-phonebk", "gsg-u-co-phonebk"],
// deprecated region tag
["und-Latn-dd", "und-Latn-DE"],
["und-dd-u-co-phonebk", "und-DE-u-co-phonebk"],
["de-dd-u-co-phonebk", "de-DE-u-co-phonebk"],
["de-latn-dd-u-co-phonebk", "de-Latn-DE-u-co-phonebk"],
["fr-ZR", "fr-CD"],
// Deprecated [23]-letter language tags
["in", "id"],
["in-latn", "id-Latn"],
["in-latn-id", "id-Latn-ID"],
["in-latn-id-u-ca-gregory", "id-Latn-ID-u-ca-gregory"],
["jw", "jv"],
["aam", "aas"],
["aam-u-ca-gregory", "aas-u-ca-gregory"],
].forEach(function (entry) {
const canonicalLocales = Intl.getCanonicalLocales(entry[0]);
assertEquals(canonicalLocales.length, 1);
assertEquals(canonicalLocales[0], entry[1]);
})
...@@ -438,9 +438,6 @@ ...@@ -438,9 +438,6 @@
# https://bugs.chromium.org/p/v8/issues/detail?id=7472 # https://bugs.chromium.org/p/v8/issues/detail?id=7472
'intl402/NumberFormat/currency-digits': [FAIL], 'intl402/NumberFormat/currency-digits': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7473
'intl402/language-tags-canonicalized': [SKIP],
# https://bugs.chromium.org/p/v8/issues/detail?id=7474 # https://bugs.chromium.org/p/v8/issues/detail?id=7474
'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-fraction-digits': [FAIL],
'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL], 'intl402/NumberFormat/prototype/format/format-significant-digits': [FAIL],
...@@ -462,10 +459,9 @@ ...@@ -462,10 +459,9 @@
# https://bugs.chromium.org/p/v8/issues/detail?id=7483 # https://bugs.chromium.org/p/v8/issues/detail?id=7483
'annexB/built-ins/Function/createdynfn-html-close-comment-params': [FAIL], 'annexB/built-ins/Function/createdynfn-html-close-comment-params': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=5012 # https://bugs.chromium.org/p/v8/issues/detail?id=7669
'intl402/Intl/getCanonicalLocales/canonicalized-tags': [FAIL], 'intl402/Intl/getCanonicalLocales/canonicalized-tags': [FAIL],
'intl402/Intl/getCanonicalLocales/preferred-grandfathered': [FAIL], 'intl402/Intl/getCanonicalLocales/preferred-grandfathered': [FAIL],
'intl402/Intl/getCanonicalLocales/preferred-variant': [SKIP],
# https://bugs.chromium.org/p/v8/issues/detail?id=7513 # https://bugs.chromium.org/p/v8/issues/detail?id=7513
'built-ins/TypedArrays/ctors/buffer-arg/buffer-arg-bufferbyteoffset-throws-from-modulo-element-size': [FAIL], 'built-ins/TypedArrays/ctors/buffer-arg/buffer-arg-bufferbyteoffset-throws-from-modulo-element-size': [FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment