Commit ff9ce1ab authored by lrn@chromium.org's avatar lrn@chromium.org

Make date parser handle all ES5 Date Time Strings correctly.

This means that ES5 Date Time Strings will default to UTC if timezone is absent.
Handle as many legacy strings as possible the same way as before

BUG=v8:1498
TEST=mjsunit/date

Review URL: http://codereview.chromium.org/7291022

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8513 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent f2a9026d
...@@ -981,11 +981,22 @@ function PadInt(n, digits) { ...@@ -981,11 +981,22 @@ function PadInt(n, digits) {
function DateToISOString() { function DateToISOString() {
var t = DATE_VALUE(this); var t = DATE_VALUE(this);
if (NUMBER_IS_NAN(t)) return kInvalidDate; if (NUMBER_IS_NAN(t)) return kInvalidDate;
return this.getUTCFullYear() + var year = this.getUTCFullYear();
var year_string;
if (year >= 0 && year <= 9999) {
year_string = PadInt(year, 4);
} else {
if (year < 0) {
year_string = "-" + PadInt(-year, 6);
} else {
year_string = "+" + PadInt(year, 6);
}
}
return year_string +
'-' + PadInt(this.getUTCMonth() + 1, 2) + '-' + PadInt(this.getUTCMonth() + 1, 2) +
'-' + PadInt(this.getUTCDate(), 2) + '-' + PadInt(this.getUTCDate(), 2) +
'T' + PadInt(this.getUTCHours(), 2) + 'T' + PadInt(this.getUTCHours(), 2) +
':' + PadInt(this.getUTCMinutes(), 2) + ':' + PadInt(this.getUTCMinutes(), 2) +
':' + PadInt(this.getUTCSeconds(), 2) + ':' + PadInt(this.getUTCSeconds(), 2) +
'.' + PadInt(this.getUTCMilliseconds(), 3) + '.' + PadInt(this.getUTCMilliseconds(), 3) +
'Z'; 'Z';
...@@ -995,8 +1006,8 @@ function DateToISOString() { ...@@ -995,8 +1006,8 @@ function DateToISOString() {
function DateToJSON(key) { function DateToJSON(key) {
var o = ToObject(this); var o = ToObject(this);
var tv = DefaultNumber(o); var tv = DefaultNumber(o);
if (IS_NUMBER(tv) && !NUMBER_IS_FINITE(tv)) { if (IS_NUMBER(tv) && !NUMBER_IS_FINITE(tv)) {
return null; return null;
} }
return o.toISOString(); return o.toISOString();
} }
......
...@@ -39,16 +39,71 @@ bool DateParser::Parse(Vector<Char> str, ...@@ -39,16 +39,71 @@ bool DateParser::Parse(Vector<Char> str,
UnicodeCache* unicode_cache) { UnicodeCache* unicode_cache) {
ASSERT(out->length() >= OUTPUT_SIZE); ASSERT(out->length() >= OUTPUT_SIZE);
InputReader<Char> in(unicode_cache, str); InputReader<Char> in(unicode_cache, str);
DateStringTokenizer<Char> scanner(&in);
TimeZoneComposer tz; TimeZoneComposer tz;
TimeComposer time; TimeComposer time;
DayComposer day; DayComposer day;
while (!in.IsEnd()) { // Specification:
if (in.IsAsciiDigit()) { // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible
// Parse a number (possibly with 1 or 2 trailing colons). // with Safari.
int n = in.ReadUnsignedNumber(); // ES5 ISO 8601 dates:
if (in.Skip(':')) { // [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]]
if (in.Skip(':')) { // where yyyy is in the range 0000..9999 and
// +/-yyyyyy is in the range -999999..+999999 -
// but -000000 is invalid (year zero must be positive),
// MM is in the range 01..12,
// DD is in the range 01..31,
// MM and DD defaults to 01 if missing,,
// HH is generally in the range 00..23, but can be 24 if mm, ss
// and sss are zero (or missing), representing midnight at the
// end of a day,
// mm and ss are in the range 00..59,
// sss is in the range 000..999,
// hh is in the range 00..23,
// mm, ss, and sss default to 00 if missing, and
// timezone defaults to Z if missing.
// Extensions:
// We also allow sss to have more or less than three digits (but at
// least one).
// We allow hh:mm to be specified as hhmm.
// Legacy dates:
// Any unrecognized word before the first number is ignored.
// Parenthesized text is ignored.
// An unsigned number followed by ':' is a time value, and is
// added to the TimeComposer. A number followed by '::' adds a second
// zero as well. A number followed by '.' is also a time and must be
// followed by milliseconds.
// Any other number is a date component and is added to DayComposer.
// A month name (or really: any word having the same first three letters
// as a month name) is recorded as a named month in the Day composer.
// A word recognizable as a time-zone is recorded as such, as is
// '(+|-)(hhmm|hh:)'.
// Legacy dates don't allow extra signs ('+' or '-') or umatched ')'
// after a number has been read (before the first number, any garbage
// is allowed).
// Intersection of the two:
// A string that matches both formats (e.g. 1970-01-01) will be
// parsed as an ES5 date-time string - which means it will default
// to UTC time-zone. That's unavoidable if following the ES5
// specification.
// After a valid "T" has been read while scanning an ES5 datetime string,
// the input can no longer be a valid legacy date, since the "T" is a
// garbage string after a number has been read.
// First try getting as far as possible with as ES5 Date Time String.
DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz);
if (next_unhandled_token.IsInvalid()) return false;
bool has_read_number = !day.IsEmpty();
// If there's anything left, continue with the legacy parser.
for (DateToken token = next_unhandled_token;
!token.IsEndOfInput();
token = scanner.Next()) {
if (token.IsNumber()) {
has_read_number = true;
int n = token.number();
if (scanner.SkipSymbol(':')) {
if (scanner.SkipSymbol(':')) {
// n + "::" // n + "::"
if (!time.IsEmpty()) return false; if (!time.IsEmpty()) return false;
time.Add(n); time.Add(n);
...@@ -56,12 +111,13 @@ bool DateParser::Parse(Vector<Char> str, ...@@ -56,12 +111,13 @@ bool DateParser::Parse(Vector<Char> str,
} else { } else {
// n + ":" // n + ":"
if (!time.Add(n)) return false; if (!time.Add(n)) return false;
in.Skip('.'); if (scanner.Peek().IsSymbol('.')) scanner.Next();
} }
} else if (in.Skip('.') && time.IsExpecting(n)) { } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) {
time.Add(n); time.Add(n);
if (!in.IsAsciiDigit()) return false; if (!scanner.Peek().IsNumber()) return false;
int n = in.ReadMilliseconds(); int n = ReadMilliseconds(scanner.Next());
if (n < 0) return false;
time.AddFinal(n); time.AddFinal(n);
} else if (tz.IsExpecting(n)) { } else if (tz.IsExpecting(n)) {
tz.SetAbsoluteMinute(n); tz.SetAbsoluteMinute(n);
...@@ -69,59 +125,206 @@ bool DateParser::Parse(Vector<Char> str, ...@@ -69,59 +125,206 @@ bool DateParser::Parse(Vector<Char> str,
time.AddFinal(n); time.AddFinal(n);
// Require end, white space, "Z", "+" or "-" immediately after // Require end, white space, "Z", "+" or "-" immediately after
// finalizing time. // finalizing time.
if (!in.IsEnd() && !in.SkipWhiteSpace() && !in.Is('Z') && DateToken peek = scanner.Peek();
!in.IsAsciiSign()) return false; if (!peek.IsEndOfInput() &&
!peek.IsWhiteSpace() &&
!peek.IsKeywordZ() &&
!peek.IsAsciiSign()) return false;
} else { } else {
if (!day.Add(n)) return false; if (!day.Add(n)) return false;
in.Skip('-'); // Ignore suffix '-' for year, month, or day. scanner.SkipSymbol('-');
// Skip trailing 'T' for ECMAScript 5 date string format but make
// sure that it is followed by a digit (for the time).
if (in.Skip('T') && !in.IsAsciiDigit()) return false;
} }
} else if (in.IsAsciiAlphaOrAbove()) { } else if (token.IsKeyword()) {
// Parse a "word" (sequence of chars. >= 'A'). // Parse a "word" (sequence of chars. >= 'A').
uint32_t pre[KeywordTable::kPrefixLength]; KeywordType type = token.keyword_type();
int len = in.ReadWord(pre, KeywordTable::kPrefixLength); int value = token.keyword_value();
int index = KeywordTable::Lookup(pre, len);
KeywordType type = KeywordTable::GetType(index);
if (type == AM_PM && !time.IsEmpty()) { if (type == AM_PM && !time.IsEmpty()) {
time.SetHourOffset(KeywordTable::GetValue(index)); time.SetHourOffset(value);
} else if (type == MONTH_NAME) { } else if (type == MONTH_NAME) {
day.SetNamedMonth(KeywordTable::GetValue(index)); day.SetNamedMonth(value);
in.Skip('-'); // Ignore suffix '-' for month names scanner.SkipSymbol('-');
} else if (type == TIME_ZONE_NAME && in.HasReadNumber()) { } else if (type == TIME_ZONE_NAME && has_read_number) {
tz.Set(KeywordTable::GetValue(index)); tz.Set(value);
} else { } else {
// Garbage words are illegal if a number has been read. // Garbage words are illegal if a number has been read.
if (in.HasReadNumber()) return false; if (has_read_number) return false;
} }
} else if (in.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) { } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
// Parse UTC offset (only after UTC or time). // Parse UTC offset (only after UTC or time).
tz.SetSign(in.GetAsciiSignValue()); tz.SetSign(token.ascii_sign());
in.Next(); // The following number may be empty.
int n = in.ReadUnsignedNumber(); int n = 0;
if (in.Skip(':')) { if (scanner.Peek().IsNumber()) {
n = scanner.Next().number();
}
has_read_number = true;
if (scanner.Peek().IsSymbol(':')) {
tz.SetAbsoluteHour(n); tz.SetAbsoluteHour(n);
tz.SetAbsoluteMinute(kNone); tz.SetAbsoluteMinute(kNone);
} else { } else {
tz.SetAbsoluteHour(n / 100); tz.SetAbsoluteHour(n / 100);
tz.SetAbsoluteMinute(n % 100); tz.SetAbsoluteMinute(n % 100);
} }
} else if (in.Is('(')) { } else if ((token.IsAsciiSign() || token.IsSymbol(')')) &&
// Ignore anything from '(' to a matching ')' or end of string. has_read_number) {
in.SkipParentheses();
} else if ((in.IsAsciiSign() || in.Is(')')) && in.HasReadNumber()) {
// Extra sign or ')' is illegal if a number has been read. // Extra sign or ')' is illegal if a number has been read.
return false; return false;
} else { } else {
// Ignore other characters. // Ignore other characters and whitespace.
in.Next();
} }
} }
return day.Write(out) && time.Write(out) && tz.Write(out); return day.Write(out) && time.Write(out) && tz.Write(out);
} }
template<typename CharType>
DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() {
int pre_pos = in_->position();
if (in_->IsEnd()) return DateToken::EndOfInput();
if (in_->IsAsciiDigit()) {
int n = in_->ReadUnsignedNumeral();
int length = in_->position() - pre_pos;
return DateToken::Number(n, length);
}
if (in_->Skip(':')) return DateToken::Symbol(':');
if (in_->Skip('-')) return DateToken::Symbol('-');
if (in_->Skip('+')) return DateToken::Symbol('+');
if (in_->Skip('.')) return DateToken::Symbol('.');
if (in_->Skip(')')) return DateToken::Symbol(')');
if (in_->IsAsciiAlphaOrAbove()) {
ASSERT(KeywordTable::kPrefixLength == 3);
uint32_t buffer[3] = {0, 0, 0};
int length = in_->ReadWord(buffer, 3);
int index = KeywordTable::Lookup(buffer, length);
return DateToken::Keyword(KeywordTable::GetType(index),
KeywordTable::GetValue(index),
length);
}
if (in_->SkipWhiteSpace()) {
return DateToken::WhiteSpace(in_->position() - pre_pos);
}
if (in_->SkipParentheses()) {
return DateToken::Unknown();
}
in_->Next();
return DateToken::Unknown();
}
template <typename Char>
DateParser::DateToken DateParser::ParseES5DateTime(
DateStringTokenizer<Char>* scanner,
DayComposer* day,
TimeComposer* time,
TimeZoneComposer* tz) {
ASSERT(day->IsEmpty());
ASSERT(time->IsEmpty());
ASSERT(tz->IsEmpty());
// Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]]
if (scanner->Peek().IsAsciiSign()) {
// Keep the sign token, so we can pass it back to the legacy
// parser if we don't use it.
DateToken sign_token = scanner->Next();
if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token;
int sign = sign_token.ascii_sign();
int year = scanner->Next().number();
if (sign < 0 && year == 0) return sign_token;
day->Add(sign * year);
} else if (scanner->Peek().IsFixedLengthNumber(4)) {
day->Add(scanner->Next().number());
} else {
return scanner->Next();
}
if (scanner->SkipSymbol('-')) {
if (!scanner->Peek().IsFixedLengthNumber(2) ||
!DayComposer::IsMonth(scanner->Peek().number())) return scanner->Next();
day->Add(scanner->Next().number());
if (scanner->SkipSymbol('-')) {
if (!scanner->Peek().IsFixedLengthNumber(2) ||
!DayComposer::IsDay(scanner->Peek().number())) return scanner->Next();
day->Add(scanner->Next().number());
}
}
// Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z
if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) {
if (!scanner->Peek().IsEndOfInput()) return scanner->Next();
} else {
// ES5 Date Time String time part is present.
scanner->Next();
if (!scanner->Peek().IsFixedLengthNumber(2) ||
!Between(scanner->Peek().number(), 0, 24)) {
return DateToken::Invalid();
}
// Allow 24:00[:00[.000]], but no other time starting with 24.
bool hour_is_24 = (scanner->Peek().number() == 24);
time->Add(scanner->Next().number());
if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
if (!scanner->Peek().IsFixedLengthNumber(2) ||
!TimeComposer::IsMinute(scanner->Peek().number()) ||
(hour_is_24 && scanner->Peek().number() > 0)) {
return DateToken::Invalid();
}
time->Add(scanner->Next().number());
if (scanner->SkipSymbol(':')) {
if (!scanner->Peek().IsFixedLengthNumber(2) ||
!TimeComposer::IsSecond(scanner->Peek().number()) ||
(hour_is_24 && scanner->Peek().number() > 0)) {
return DateToken::Invalid();
}
time->Add(scanner->Next().number());
if (scanner->SkipSymbol('.')) {
if (!scanner->Peek().IsNumber() ||
(hour_is_24 && scanner->Peek().number() > 0)) {
return DateToken::Invalid();
}
// Allow more or less than the mandated three digits.
time->Add(ReadMilliseconds(scanner->Next()));
}
}
// Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm
if (scanner->Peek().IsKeywordZ()) {
scanner->Next();
tz->Set(0);
} else if (scanner->Peek().IsSymbol('+') ||
scanner->Peek().IsSymbol('-')) {
tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1);
if (scanner->Peek().IsFixedLengthNumber(4)) {
// hhmm extension syntax.
int hourmin = scanner->Next().number();
int hour = hourmin / 100;
int min = hourmin % 100;
if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) {
return DateToken::Invalid();
}
tz->SetAbsoluteHour(hour);
tz->SetAbsoluteMinute(min);
} else {
// hh:mm standard syntax.
if (!scanner->Peek().IsFixedLengthNumber(2) ||
!TimeComposer::IsHour(scanner->Peek().number())) {
return DateToken::Invalid();
}
tz->SetAbsoluteHour(scanner->Next().number());
if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
if (!scanner->Peek().IsFixedLengthNumber(2) ||
!TimeComposer::IsMinute(scanner->Peek().number())) {
return DateToken::Invalid();
}
tz->SetAbsoluteMinute(scanner->Next().number());
}
}
if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid();
}
// Successfully parsed ES5 Date Time String. Default to UTC if no TZ given.
if (tz->IsEmpty()) tz->Set(0);
day->set_iso_date();
return DateToken::EndOfInput();
}
} } // namespace v8::internal } } // namespace v8::internal
#endif // V8_DATEPARSER_INL_H_ #endif // V8_DATEPARSER_INL_H_
// Copyright 2008 the V8 project authors. All rights reserved. // Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without // Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are // modification, are permitted provided that the following conditions are
// met: // met:
...@@ -44,7 +44,7 @@ bool DateParser::DayComposer::Write(FixedArray* output) { ...@@ -44,7 +44,7 @@ bool DateParser::DayComposer::Write(FixedArray* output) {
int day = kNone; int day = kNone;
if (named_month_ == kNone) { if (named_month_ == kNone) {
if (index_ == 3 && !IsDay(comp_[0])) { if (is_iso_date_ || (index_ == 3 && !IsDay(comp_[0]))) {
// YMD // YMD
year = comp_[0]; year = comp_[0];
month = comp_[1]; month = comp_[1];
...@@ -71,8 +71,10 @@ bool DateParser::DayComposer::Write(FixedArray* output) { ...@@ -71,8 +71,10 @@ bool DateParser::DayComposer::Write(FixedArray* output) {
} }
} }
if (Between(year, 0, 49)) year += 2000; if (!is_iso_date_) {
else if (Between(year, 50, 99)) year += 1900; if (Between(year, 0, 49)) year += 2000;
else if (Between(year, 50, 99)) year += 1900;
}
if (!Smi::IsValid(year) || !IsMonth(month) || !IsDay(day)) return false; if (!Smi::IsValid(year) || !IsMonth(month) || !IsDay(day)) return false;
...@@ -151,6 +153,7 @@ const int8_t DateParser::KeywordTable:: ...@@ -151,6 +153,7 @@ const int8_t DateParser::KeywordTable::
{'m', 's', 't', DateParser::TIME_ZONE_NAME, -7}, {'m', 's', 't', DateParser::TIME_ZONE_NAME, -7},
{'p', 'd', 't', DateParser::TIME_ZONE_NAME, -7}, {'p', 'd', 't', DateParser::TIME_ZONE_NAME, -7},
{'p', 's', 't', DateParser::TIME_ZONE_NAME, -8}, {'p', 's', 't', DateParser::TIME_ZONE_NAME, -8},
{'t', '\0', '\0', DateParser::TIME_SEPARATOR, 0},
{'\0', '\0', '\0', DateParser::INVALID, 0}, {'\0', '\0', '\0', DateParser::INVALID, 0},
}; };
...@@ -175,4 +178,35 @@ int DateParser::KeywordTable::Lookup(const uint32_t* pre, int len) { ...@@ -175,4 +178,35 @@ int DateParser::KeywordTable::Lookup(const uint32_t* pre, int len) {
} }
int DateParser::ReadMilliseconds(DateToken token) {
// Read first three significant digits of the original numeral,
// as inferred from the value and the number of digits.
// I.e., use the number of digits to see if there were
// leading zeros.
int number = token.number();
int length = token.length();
if (length < 3) {
// Less than three digits. Multiply to put most significant digit
// in hundreds position.
if (length == 1) {
number *= 100;
} else if (length == 2) {
number *= 10;
}
} else if (length > 3) {
if (length > kMaxSignificantDigits) length = kMaxSignificantDigits;
// More than three digits. Divide by 10^(length - 3) to get three
// most significant digits.
int factor = 1;
do {
ASSERT(factor <= 100000000); // factor won't overflow.
factor *= 10;
length--;
} while (length > 3);
number /= factor;
}
return number;
}
} } // namespace v8::internal } } // namespace v8::internal
...@@ -61,9 +61,14 @@ class DateParser : public AllStatic { ...@@ -61,9 +61,14 @@ class DateParser : public AllStatic {
static inline bool Between(int x, int lo, int hi) { static inline bool Between(int x, int lo, int hi) {
return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
} }
// Indicates a missing value. // Indicates a missing value.
static const int kNone = kMaxInt; static const int kNone = kMaxInt;
// Maximal number of digits used to build the value of a numeral.
// Remaining digits are ignored.
static const int kMaxSignificantDigits = 9;
// InputReader provides basic string parsing and character classification. // InputReader provides basic string parsing and character classification.
template <typename Char> template <typename Char>
class InputReader BASE_EMBEDDED { class InputReader BASE_EMBEDDED {
...@@ -71,32 +76,28 @@ class DateParser : public AllStatic { ...@@ -71,32 +76,28 @@ class DateParser : public AllStatic {
InputReader(UnicodeCache* unicode_cache, Vector<Char> s) InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
: index_(0), : index_(0),
buffer_(s), buffer_(s),
has_read_number_(false),
unicode_cache_(unicode_cache) { unicode_cache_(unicode_cache) {
Next(); Next();
} }
int position() { return index_; }
// Advance to the next character of the string. // Advance to the next character of the string.
void Next() { ch_ = (index_ < buffer_.length()) ? buffer_[index_++] : 0; } void Next() {
ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
// Read a string of digits as an unsigned number (cap just below kMaxInt). index_++;
int ReadUnsignedNumber() {
has_read_number_ = true;
int n;
for (n = 0; IsAsciiDigit() && n < kMaxInt / 10 - 1; Next()) {
n = n * 10 + ch_ - '0';
}
return n;
} }
// Read a string of digits, take the first three or fewer as an unsigned // Read a string of digits as an unsigned number. Cap value at
// number of milliseconds, and ignore any digits after the first three. // kMaxSignificantDigits, but skip remaining digits if the numeral
int ReadMilliseconds() { // is longer.
has_read_number_ = true; int ReadUnsignedNumeral() {
int n = 0; int n = 0;
int power; int i = 0;
for (power = 100; IsAsciiDigit(); Next(), power = power / 10) { while (IsAsciiDigit()) {
n = n + power * (ch_ - '0'); if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
i++;
Next();
} }
return n; return n;
} }
...@@ -151,18 +152,138 @@ class DateParser : public AllStatic { ...@@ -151,18 +152,138 @@ class DateParser : public AllStatic {
// Return 1 for '+' and -1 for '-'. // Return 1 for '+' and -1 for '-'.
int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
// Indicates whether any (possibly empty!) numbers have been read.
bool HasReadNumber() const { return has_read_number_; }
private: private:
int index_; int index_;
Vector<Char> buffer_; Vector<Char> buffer_;
bool has_read_number_;
uint32_t ch_; uint32_t ch_;
UnicodeCache* unicode_cache_; UnicodeCache* unicode_cache_;
}; };
enum KeywordType { INVALID, MONTH_NAME, TIME_ZONE_NAME, AM_PM }; enum KeywordType {
INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
};
struct DateToken {
public:
bool IsInvalid() { return tag_ == kInvalidTokenTag; }
bool IsUnknown() { return tag_ == kUnknownTokenTag; }
bool IsNumber() { return tag_ == kNumberTag; }
bool IsSymbol() { return tag_ == kSymbolTag; }
bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
bool IsKeyword() { return tag_ >= kKeywordTagStart; }
int length() { return length_; }
int number() {
ASSERT(IsNumber());
return value_;
}
KeywordType keyword_type() {
ASSERT(IsKeyword());
return static_cast<KeywordType>(tag_);
}
int keyword_value() {
ASSERT(IsKeyword());
return value_;
}
char symbol() {
ASSERT(IsSymbol());
return static_cast<char>(value_);
}
bool IsSymbol(char symbol) {
return IsSymbol() && this->symbol() == symbol;
}
bool IsKeywordType(KeywordType tag) {
return tag_ == tag;
}
bool IsFixedLengthNumber(int length) {
return IsNumber() && length_ == length;
}
bool IsAsciiSign() {
return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
}
int ascii_sign() {
ASSERT(IsAsciiSign());
return 44 - value_;
}
bool IsKeywordZ() {
return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
}
bool IsUnknown(int character) {
return IsUnknown() && value_ == character;
}
// Factory functions.
static DateToken Keyword(KeywordType tag, int value, int length) {
return DateToken(tag, length, value);
}
static DateToken Number(int value, int length) {
return DateToken(kNumberTag, length, value);
}
static DateToken Symbol(char symbol) {
return DateToken(kSymbolTag, 1, symbol);
}
static DateToken EndOfInput() {
return DateToken(kEndOfInputTag, 0, -1);
}
static DateToken WhiteSpace(int length) {
return DateToken(kWhiteSpaceTag, length, -1);
}
static DateToken Unknown() {
return DateToken(kUnknownTokenTag, 1, -1);
}
static DateToken Invalid() {
return DateToken(kInvalidTokenTag, 0, -1);
}
private:
enum TagType {
kInvalidTokenTag = -6,
kUnknownTokenTag = -5,
kWhiteSpaceTag = -4,
kNumberTag = -3,
kSymbolTag = -2,
kEndOfInputTag = -1,
kKeywordTagStart = 0
};
DateToken(int tag, int length, int value)
: tag_(tag),
length_(length),
value_(value) { }
int tag_;
int length_; // Number of characters.
int value_;
};
template <typename Char>
class DateStringTokenizer {
public:
explicit DateStringTokenizer(InputReader<Char>* in)
: in_(in), next_(Scan()) { }
DateToken Next() {
DateToken result = next_;
next_ = Scan();
return result;
}
DateToken Peek() {
return next_;
}
bool SkipSymbol(char symbol) {
if (next_.IsSymbol(symbol)) {
next_ = Scan();
return true;
}
return false;
}
private:
DateToken Scan();
InputReader<Char>* in_;
DateToken next_;
};
static int ReadMilliseconds(DateToken number);
// KeywordTable maps names of months, time zones, am/pm to numbers. // KeywordTable maps names of months, time zones, am/pm to numbers.
class KeywordTable : public AllStatic { class KeywordTable : public AllStatic {
...@@ -201,6 +322,7 @@ class DateParser : public AllStatic { ...@@ -201,6 +322,7 @@ class DateParser : public AllStatic {
} }
bool IsUTC() const { return hour_ == 0 && minute_ == 0; } bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
bool Write(FixedArray* output); bool Write(FixedArray* output);
bool IsEmpty() { return hour_ == kNone; }
private: private:
int sign_; int sign_;
int hour_; int hour_;
...@@ -228,10 +350,10 @@ class DateParser : public AllStatic { ...@@ -228,10 +350,10 @@ class DateParser : public AllStatic {
bool Write(FixedArray* output); bool Write(FixedArray* output);
static bool IsMinute(int x) { return Between(x, 0, 59); } static bool IsMinute(int x) { return Between(x, 0, 59); }
private:
static bool IsHour(int x) { return Between(x, 0, 23); } static bool IsHour(int x) { return Between(x, 0, 23); }
static bool IsHour12(int x) { return Between(x, 0, 12); }
static bool IsSecond(int x) { return Between(x, 0, 59); } static bool IsSecond(int x) { return Between(x, 0, 59); }
private:
static bool IsHour12(int x) { return Between(x, 0, 12); }
static bool IsMillisecond(int x) { return Between(x, 0, 999); } static bool IsMillisecond(int x) { return Between(x, 0, 999); }
static const int kSize = 4; static const int kSize = 4;
...@@ -242,22 +364,42 @@ class DateParser : public AllStatic { ...@@ -242,22 +364,42 @@ class DateParser : public AllStatic {
class DayComposer BASE_EMBEDDED { class DayComposer BASE_EMBEDDED {
public: public:
DayComposer() : index_(0), named_month_(kNone) {} DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
bool IsEmpty() const { return index_ == 0; } bool IsEmpty() const { return index_ == 0; }
bool Add(int n) { bool Add(int n) {
return index_ < kSize ? (comp_[index_++] = n, true) : false; if (index_ < kSize) {
comp_[index_] = n;
index_++;
return true;
}
return false;
} }
void SetNamedMonth(int n) { named_month_ = n; } void SetNamedMonth(int n) { named_month_ = n; }
bool Write(FixedArray* output); bool Write(FixedArray* output);
private: void set_iso_date() { is_iso_date_ = true; }
static bool IsMonth(int x) { return Between(x, 1, 12); } static bool IsMonth(int x) { return Between(x, 1, 12); }
static bool IsDay(int x) { return Between(x, 1, 31); } static bool IsDay(int x) { return Between(x, 1, 31); }
private:
static const int kSize = 3; static const int kSize = 3;
int comp_[kSize]; int comp_[kSize];
int index_; int index_;
int named_month_; int named_month_;
// If set, ensures that data is always parsed in year-month-date order.
bool is_iso_date_;
}; };
// Tries to parse an ES5 Date Time String. Returns the next token
// to continue with in the legacy date string parser. If parsing is
// complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
// returns DateToken::Invalid(). Otherwise parsing continues in the
// legacy parser.
template <typename Char>
static DateParser::DateToken ParseES5DateTime(
DateStringTokenizer<Char>* scanner,
DayComposer* day,
TimeComposer* time,
TimeZoneComposer* tz);
}; };
......
...@@ -285,9 +285,9 @@ for (var i = 0; i < 24 * 365 * 100; i += 150) { ...@@ -285,9 +285,9 @@ for (var i = 0; i < 24 * 365 * 100; i += 150) {
// Negative tests. // Negative tests.
var testCasesNegative = [ var testCasesNegative = [
'May 25 2008 1:30 (PM)) UTC', 'May 25 2008 1:30 (PM)) UTC', // Bad unmatched ')' after number.
'May 25 2008 1:30( )AM (PM)', 'May 25 2008 1:30( )AM (PM)', //
'May 25 2008 AAA (GMT)']; 'May 25 2008 AAA (GMT)']; // Unknown word after number.
testCasesNegative.forEach(function (s) { testCasesNegative.forEach(function (s) {
assertTrue(isNaN(Date.parse(s)), s + " is not NaN."); assertTrue(isNaN(Date.parse(s)), s + " is not NaN.");
......
...@@ -187,3 +187,123 @@ d = new Date(1969, 12, 1, Infinity); ...@@ -187,3 +187,123 @@ d = new Date(1969, 12, 1, Infinity);
assertTrue(isNaN(d.getTime())); assertTrue(isNaN(d.getTime()));
d = new Date(1969, 12, 1, -Infinity); d = new Date(1969, 12, 1, -Infinity);
assertTrue(isNaN(d.getTime())); assertTrue(isNaN(d.getTime()));
// Parsing ES5 ISO-8601 dates.
// When TZ is omitted, it defaults to 'Z' meaning UTC.
// Check epoch.
assertEquals(0, Date.parse("1970-01-01T00:00:00.000+00:00"));
assertEquals(0, Date.parse("1970-01-01T00:00:00.000-00:00"));
assertEquals(0, Date.parse("1970-01-01T00:00:00.000Z"));
assertEquals(0, Date.parse("1970-01-01T00:00:00.000"));
assertEquals(0, Date.parse("1970-01-01T00:00:00"));
assertEquals(0, Date.parse("1970-01-01T00:00"));
assertEquals(0, Date.parse("1970-01-01"));
assertEquals(0, Date.parse("1970-01T00:00:00.000+00:00"));
assertEquals(0, Date.parse("1970-01T00:00:00.000-00:00"));
assertEquals(0, Date.parse("1970-01T00:00:00.000Z"));
assertEquals(0, Date.parse("1970-01T00:00:00.000"));
assertEquals(0, Date.parse("1970-01T00:00:00"));
assertEquals(0, Date.parse("1970-01T00:00"));
assertEquals(0, Date.parse("1970-01"));
assertEquals(0, Date.parse("1970T00:00:00.000+00:00"));
assertEquals(0, Date.parse("1970T00:00:00.000-00:00"));
assertEquals(0, Date.parse("1970T00:00:00.000Z"));
assertEquals(0, Date.parse("1970T00:00:00.000"));
assertEquals(0, Date.parse("1970T00:00:00"));
assertEquals(0, Date.parse("1970T00:00"));
assertEquals(0, Date.parse("1970"));
assertEquals(0, Date.parse("+001970-01-01T00:00:00.000+00:00"));
assertEquals(0, Date.parse("+001970-01-01T00:00:00.000-00:00"));
assertEquals(0, Date.parse("+001970-01-01T00:00:00.000Z"));
assertEquals(0, Date.parse("+001970-01-01T00:00:00.000"));
assertEquals(0, Date.parse("+001970-01-01T00:00:00"));
assertEquals(0, Date.parse("+001970-01-01T00:00"));
assertEquals(0, Date.parse("+001970-01-01"));
assertEquals(0, Date.parse("+001970-01T00:00:00.000+00:00"));
assertEquals(0, Date.parse("+001970-01T00:00:00.000-00:00"));
assertEquals(0, Date.parse("+001970-01T00:00:00.000Z"));
assertEquals(0, Date.parse("+001970-01T00:00:00.000"));
assertEquals(0, Date.parse("+001970-01T00:00:00"));
assertEquals(0, Date.parse("+001970-01T00:00"));
assertEquals(0, Date.parse("+001970-01"));
assertEquals(0, Date.parse("+001970T00:00:00.000+00:00"));
assertEquals(0, Date.parse("+001970T00:00:00.000-00:00"));
assertEquals(0, Date.parse("+001970T00:00:00.000Z"));
assertEquals(0, Date.parse("+001970T00:00:00.000"));
assertEquals(0, Date.parse("+001970T00:00:00"));
assertEquals(0, Date.parse("+001970T00:00"));
assertEquals(0, Date.parse("+001970"));
// Check random date.
assertEquals(70671003500, Date.parse("1972-03-28T23:50:03.500+01:00"));
assertEquals(70674603500, Date.parse("1972-03-28T23:50:03.500Z"));
assertEquals(70674603500, Date.parse("1972-03-28T23:50:03.500"));
assertEquals(70674603000, Date.parse("1972-03-28T23:50:03"));
assertEquals(70674600000, Date.parse("1972-03-28T23:50"));
assertEquals(70588800000, Date.parse("1972-03-28"));
assertEquals(68338203500, Date.parse("1972-03T23:50:03.500+01:00"));
assertEquals(68341803500, Date.parse("1972-03T23:50:03.500Z"));
assertEquals(68341803500, Date.parse("1972-03T23:50:03.500"));
assertEquals(68341803000, Date.parse("1972-03T23:50:03"));
assertEquals(68341800000, Date.parse("1972-03T23:50"));
assertEquals(68256000000, Date.parse("1972-03"));
assertEquals(63154203500, Date.parse("1972T23:50:03.500+01:00"));
assertEquals(63157803500, Date.parse("1972T23:50:03.500Z"));
assertEquals(63157803500, Date.parse("1972T23:50:03.500"));
assertEquals(63157803000, Date.parse("1972T23:50:03"));
assertEquals(63072000000, Date.parse("1972"));
assertEquals(70671003500, Date.parse("+001972-03-28T23:50:03.500+01:00"));
assertEquals(70674603500, Date.parse("+001972-03-28T23:50:03.500Z"));
assertEquals(70674603500, Date.parse("+001972-03-28T23:50:03.500"));
assertEquals(70674603000, Date.parse("+001972-03-28T23:50:03"));
assertEquals(70674600000, Date.parse("+001972-03-28T23:50"));
assertEquals(70588800000, Date.parse("+001972-03-28"));
assertEquals(68338203500, Date.parse("+001972-03T23:50:03.500+01:00"));
assertEquals(68341803500, Date.parse("+001972-03T23:50:03.500Z"));
assertEquals(68341803500, Date.parse("+001972-03T23:50:03.500"));
assertEquals(68341803000, Date.parse("+001972-03T23:50:03"));
assertEquals(68341800000, Date.parse("+001972-03T23:50"));
assertEquals(68256000000, Date.parse("+001972-03"));
assertEquals(63154203500, Date.parse("+001972T23:50:03.500+01:00"));
assertEquals(63157803500, Date.parse("+001972T23:50:03.500Z"));
assertEquals(63157803500, Date.parse("+001972T23:50:03.500"));
assertEquals(63157803000, Date.parse("+001972T23:50:03"));
assertEquals(63072000000, Date.parse("+001972"));
// Ensure that ISO-years in the range 00-99 aren't translated to the range
// 1950..2049.
assertEquals(-60904915200000, Date.parse("0040-01-01"));
assertEquals(-60273763200000, Date.parse("0060-01-01"));
assertEquals(-62167219200000, Date.parse("0000-01-01"));
assertEquals(-62167219200000, Date.parse("+000000-01-01"));
// Test negative years.
assertEquals(-63429523200000, Date.parse("-000040-01-01"));
assertEquals(-64060675200000, Date.parse("-000060-01-01"));
assertEquals(-124397510400000, Date.parse("-001972-01-01"));
// Check time-zones.
assertEquals(70674603500, Date.parse("1972-03-28T23:50:03.500Z"));
for (var i = 0; i < 24; i++) {
var hh = (i < 10) ? "0" + i : "" + i;
for (var j = 0; j < 60; j += 15) {
var mm = (j < 10) ? "0" + j : "" + j;
var ms = (i * 60 + j) * 60000;
var string = "1972-03-28T23:50:03.500-" + hh + ":" + mm;
assertEquals(70674603500 + ms, Date.parse(string), string);
string = "1972-03-28T23:50:03.500+" + hh + ":" + mm;
assertEquals(70674603500 - ms, Date.parse(string), string);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment