Commit 4e18190e authored by jshin's avatar jshin Committed by Commit bot

Timezone name check fix

1. Location names with more than one underscores (e.g. Ho_Chi_Minh)
   didn't work because of the way capturing works with repeated patterns
   in RE. It's now supported by changing the RE to capture the whole string
   and splitting on '_' in the next step.

2. Adds support for location names with a hyphen

3. Adds support for timezone ids with three parts (e.g.
   American/Argentina/Buenos_Aires)

4. Adds special handling of 'au', 'es' and 'of' in zone ids. They need to be kept in lowercase. (see the full list at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones )

5. Adds regression tests for all the above and make the existing tests
   more robust against future ICU changes. ICU canonicalizes zone names to
   deprecated names, but it may change. (
   http://bugs.icu-project.org/trac/ticket/12044 )

BUG=364374
LOG=Y

Review URL: https://codereview.chromium.org/1529363005

Cr-Commit-Position: refs/heads/master@{#33097}
parent af95a4dd
...@@ -176,12 +176,25 @@ var TIMEZONE_NAME_CHECK_RE = UNDEFINED; ...@@ -176,12 +176,25 @@ var TIMEZONE_NAME_CHECK_RE = UNDEFINED;
function GetTimezoneNameCheckRE() { function GetTimezoneNameCheckRE() {
if (IS_UNDEFINED(TIMEZONE_NAME_CHECK_RE)) { if (IS_UNDEFINED(TIMEZONE_NAME_CHECK_RE)) {
TIMEZONE_NAME_CHECK_RE = TIMEZONE_NAME_CHECK_RE = new GlobalRegExp(
new GlobalRegExp('^([A-Za-z]+)/([A-Za-z]+)(?:_([A-Za-z]+))*$'); '^([A-Za-z]+)/([A-Za-z_-]+)((?:\/[A-Za-z_-]+)+)*$');
} }
return TIMEZONE_NAME_CHECK_RE; return TIMEZONE_NAME_CHECK_RE;
} }
/**
* Matches valid location parts of IANA time zone names.
*/
var TIMEZONE_NAME_LOCATION_PART_RE = UNDEFINED;
function GetTimezoneNameLocationPartRE() {
if (IS_UNDEFINED(TIMEZONE_NAME_LOCATION_PART_RE)) {
TIMEZONE_NAME_LOCATION_PART_RE =
new GlobalRegExp('^([A-Za-z]+)((?:[_-][A-Za-z]+)+)*$');
}
return TIMEZONE_NAME_LOCATION_PART_RE;
}
/** /**
* Adds bound method to the prototype of the given object. * Adds bound method to the prototype of the given object.
*/ */
...@@ -678,6 +691,34 @@ function toTitleCaseWord(word) { ...@@ -678,6 +691,34 @@ function toTitleCaseWord(word) {
%StringToLowerCase(%_Call(StringSubstr, word, 1)); %StringToLowerCase(%_Call(StringSubstr, word, 1));
} }
/**
* Returns titlecased location, bueNos_airES -> Buenos_Aires
* or ho_cHi_minH -> Ho_Chi_Minh. It is locale-agnostic and only
* deals with ASCII only characters.
* 'of', 'au' and 'es' are special-cased and lowercased.
*/
function toTitleCaseTimezoneLocation(location) {
var match = %_Call(StringMatch, location, GetTimezoneNameLocationPartRE());
if (IS_NULL(match)) throw MakeRangeError(kExpectedLocation, location);
var result = toTitleCaseWord(match[1]);
if (!IS_UNDEFINED(match[2]) && 2 < match.length) {
// The first character is a separator, '_' or '-'.
// None of IANA zone names has both '_' and '-'.
var separator = %_Call(StringSubstring, match[2], 0, 1);
var parts = %_Call(StringSplit, match[2], separator);
for (var i = 1; i < parts.length; i++) {
var part = parts[i]
var lowercasedPart = %StringToLowerCase(part);
result = result + separator +
((lowercasedPart !== 'es' &&
lowercasedPart !== 'of' && lowercasedPart !== 'au') ?
toTitleCaseWord(part) : lowercasedPart);
}
}
return result;
}
/** /**
* Canonicalizes the language tag, or throws in case the tag is invalid. * Canonicalizes the language tag, or throws in case the tag is invalid.
*/ */
...@@ -1735,8 +1776,8 @@ addBoundMethod(Intl.DateTimeFormat, 'v8Parse', parseDate, 1); ...@@ -1735,8 +1776,8 @@ addBoundMethod(Intl.DateTimeFormat, 'v8Parse', parseDate, 1);
/** /**
* Returns canonical Area/Location name, or throws an exception if the zone * Returns canonical Area/Location(/Location) name, or throws an exception
* name is invalid IANA name. * if the zone name is invalid IANA name.
*/ */
function canonicalizeTimeZoneID(tzID) { function canonicalizeTimeZoneID(tzID) {
// Skip undefined zones. // Skip undefined zones.
...@@ -1751,16 +1792,22 @@ function canonicalizeTimeZoneID(tzID) { ...@@ -1751,16 +1792,22 @@ function canonicalizeTimeZoneID(tzID) {
return 'UTC'; return 'UTC';
} }
// We expect only _ and / beside ASCII letters. // TODO(jshin): Add support for Etc/GMT[+-]([1-9]|1[0-2])
// All inputs should conform to Area/Location from now on.
// We expect only _, '-' and / beside ASCII letters.
// All inputs should conform to Area/Location(/Location)* from now on.
var match = %_Call(StringMatch, tzID, GetTimezoneNameCheckRE()); var match = %_Call(StringMatch, tzID, GetTimezoneNameCheckRE());
if (IS_NULL(match)) throw MakeRangeError(kExpectedLocation, tzID); if (IS_NULL(match)) throw MakeRangeError(kExpectedTimezoneID, tzID);
var result = toTitleCaseTimezoneLocation(match[1]) + '/' +
toTitleCaseTimezoneLocation(match[2]);
var result = toTitleCaseWord(match[1]) + '/' + toTitleCaseWord(match[2]); if (!IS_UNDEFINED(match[3]) && 3 < match.length) {
var i = 3; var locations = %_Call(StringSplit, match[3], '/');
while (!IS_UNDEFINED(match[i]) && i < match.length) { // The 1st element is empty. Starts with i=1.
result = result + '_' + toTitleCaseWord(match[i]); for (var i = 1; i < locations.length; i++) {
i++; result = result + '/' + toTitleCaseTimezoneLocation(locations[i]);
}
} }
return result; return result;
......
...@@ -329,7 +329,11 @@ class CallSite { ...@@ -329,7 +329,11 @@ class CallSite {
T(UnsupportedSuper, "Unsupported reference to 'super'") \ T(UnsupportedSuper, "Unsupported reference to 'super'") \
/* RangeError */ \ /* RangeError */ \
T(DateRange, "Provided date is not in valid range.") \ T(DateRange, "Provided date is not in valid range.") \
T(ExpectedLocation, "Expected Area/Location for time zone, got %") \ T(ExpectedTimezoneID, \
"Expected Area/Location(/Location)* for time zone, got %") \
T(ExpectedLocation, \
"Expected letters optionally connected with underscores or hyphens for " \
"a location, got %") \
T(InvalidArrayBufferLength, "Invalid array buffer length") \ T(InvalidArrayBufferLength, "Invalid array buffer length") \
T(ArrayBufferAllocationFailed, "Array buffer allocation failed") \ T(ArrayBufferAllocationFailed, "Array buffer allocation failed") \
T(InvalidArrayLength, "Invalid array length") \ T(InvalidArrayLength, "Invalid array length") \
......
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
if (this.Intl) {
// Normalizes Kat{h,}mandu (chromium:487322)
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Katmandu'})
assertEquals('Asia/Katmandu', df.resolvedOptions().timeZone);
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Kathmandu'})
assertEquals('Asia/Katmandu', df.resolvedOptions().timeZone);
// Throws for unsupported time zones.
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'Aurope/Paris'}));
}
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
if (this.Intl) {
// chromium:364374
// Locations with 2 underscores are accepted and normalized.
// 'of' and 'es' are always lowercased.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'eUrope/isLe_OF_man'})
assertEquals('Europe/Isle_of_Man', df.resolvedOptions().timeZone);
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'africa/Dar_eS_salaam'})
assertEquals('Africa/Dar_es_Salaam', df.resolvedOptions().timeZone);
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/port_of_spain'})
assertEquals('America/Port_of_Spain', df.resolvedOptions().timeZone);
// Zone ids with more than 2 parts are accepted and normalized.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/north_Dakota/new_salem'})
assertEquals('America/North_Dakota/New_Salem', df.resolvedOptions().timeZone);
// 3-part zone IDs are accepted and normalized.
// Two Buenose Aires aliases are identical.
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/aRgentina/buenos_aIres'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Argentina/Buenos_Aires'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Buenos_Aires'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Indiana/Indianapolis'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Indianapolis'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
// ICU does not recognize East-Indiana. Add later when it does.
// df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/East-Indiana'})
// assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
// Zone IDs with hyphens. 'au' has to be in lowercase.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/port-aU-pRince'})
assertEquals('America/Port-au-Prince', df.resolvedOptions().timeZone);
// Accepts Ho_Chi_Minh and treats it as identical to Saigon
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ho_Chi_Minh'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Saigon'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
// Throws for invalid timezone ids.
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'Europe/_Paris'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New__York'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America//New_York'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New_York_'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New_Y0rk'}));
}
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
if (this.Intl) {
// Normalizes Kat{h,}mandu (chromium:487322)
// According to the IANA timezone db, Kathmandu is the current canonical
// name, but ICU got it backward. To make this test robust against a future
// ICU change ( http://bugs.icu-project.org/trac/ticket/12044 ),
// just check that Kat(h)mandu is resolved identically.
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Katmandu'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Kathmandu'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
// Normalizes Ulan_Bator to Ulaanbaatar. Unlike Kat(h)mandu, ICU got this
// right so that we make sure that Ulan_Bator is resolved to Ulaanbaatar.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ulaanbaatar'})
assertEquals('Asia/Ulaanbaatar', df.resolvedOptions().timeZone);
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ulan_Bator'})
assertEquals('Asia/Ulaanbaatar', df.resolvedOptions().timeZone);
// Throws for unsupported time zones.
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'Aurope/Paris'}));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment