dateparser-inl.h 12.9 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

Yang Guo's avatar
Yang Guo committed
5 6
#ifndef V8_DATE_DATEPARSER_INL_H_
#define V8_DATE_DATEPARSER_INL_H_
7

Yang Guo's avatar
Yang Guo committed
8
#include "src/date/dateparser.h"
9
#include "src/execution/isolate.h"
10
#include "src/strings/char-predicates-inl.h"
11

12 13
namespace v8 {
namespace internal {
14 15

template <typename Char>
16
bool DateParser::Parse(Isolate* isolate, base::Vector<Char> str, double* out) {
17
  InputReader<Char> in(str);
18
  DateStringTokenizer<Char> scanner(&in);
19 20 21 22
  TimeZoneComposer tz;
  TimeComposer time;
  DayComposer day;

23
  // Specification:
24
  // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible
25
  // with Safari.
26
  // ES5 ISO 8601 dates:
27 28 29 30 31 32 33 34 35 36 37 38 39 40
  //   [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]]
  //   where yyyy is in the range 0000..9999 and
  //         +/-yyyyyy is in the range -999999..+999999 -
  //           but -000000 is invalid (year zero must be positive),
  //         MM is in the range 01..12,
  //         DD is in the range 01..31,
  //         MM and DD defaults to 01 if missing,,
  //         HH is generally in the range 00..23, but can be 24 if mm, ss
  //           and sss are zero (or missing), representing midnight at the
  //           end of a day,
  //         mm and ss are in the range 00..59,
  //         sss is in the range 000..999,
  //         hh is in the range 00..23,
  //         mm, ss, and sss default to 00 if missing, and
41 42
  //         timezone defaults to Z if missing
  //           (following Safari, ISO actually demands local time).
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
  //  Extensions:
  //   We also allow sss to have more or less than three digits (but at
  //   least one).
  //   We allow hh:mm to be specified as hhmm.
  // Legacy dates:
  //  Any unrecognized word before the first number is ignored.
  //  Parenthesized text is ignored.
  //  An unsigned number followed by ':' is a time value, and is
  //  added to the TimeComposer. A number followed by '::' adds a second
  //  zero as well. A number followed by '.' is also a time and must be
  //  followed by milliseconds.
  //  Any other number is a date component and is added to DayComposer.
  //  A month name (or really: any word having the same first three letters
  //  as a month name) is recorded as a named month in the Day composer.
  //  A word recognizable as a time-zone is recorded as such, as is
  //  '(+|-)(hhmm|hh:)'.
  //  Legacy dates don't allow extra signs ('+' or '-') or umatched ')'
  //  after a number has been read (before the first number, any garbage
  //  is allowed).
  // Intersection of the two:
  //  A string that matches both formats (e.g. 1970-01-01) will be
64 65 66 67
  //  parsed as an ES5 date-time string - which means it will default
  //  to UTC time-zone. That's unavoidable if following the ES5
  //  specification.
  //  After a valid "T" has been read while scanning an ES5 datetime string,
68 69 70
  //  the input can no longer be a valid legacy date, since the "T" is a
  //  garbage string after a number has been read.

71 72
  // First try getting as far as possible with as ES5 Date Time String.
  DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz);
73 74 75
  if (next_unhandled_token.IsInvalid()) return false;
  bool has_read_number = !day.IsEmpty();
  // If there's anything left, continue with the legacy parser.
76
  bool legacy_parser = false;
Yang Guo's avatar
Yang Guo committed
77
  for (DateToken token = next_unhandled_token; !token.IsEndOfInput();
78 79
       token = scanner.Next()) {
    if (token.IsNumber()) {
80
      legacy_parser = true;
81 82 83 84
      has_read_number = true;
      int n = token.number();
      if (scanner.SkipSymbol(':')) {
        if (scanner.SkipSymbol(':')) {
85 86 87 88 89 90 91
          // n + "::"
          if (!time.IsEmpty()) return false;
          time.Add(n);
          time.Add(0);
        } else {
          // n + ":"
          if (!time.Add(n)) return false;
92
          if (scanner.Peek().IsSymbol('.')) scanner.Next();
93
        }
94
      } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) {
95
        time.Add(n);
96
        if (!scanner.Peek().IsNumber()) return false;
97 98 99
        int ms = ReadMilliseconds(scanner.Next());
        if (ms < 0) return false;
        time.AddFinal(ms);
100 101 102 103
      } else if (tz.IsExpecting(n)) {
        tz.SetAbsoluteMinute(n);
      } else if (time.IsExpecting(n)) {
        time.AddFinal(n);
104 105
        // Require end, white space, "Z", "+" or "-" immediately after
        // finalizing time.
106
        DateToken peek = scanner.Peek();
Yang Guo's avatar
Yang Guo committed
107 108 109
        if (!peek.IsEndOfInput() && !peek.IsWhiteSpace() &&
            !peek.IsKeywordZ() && !peek.IsAsciiSign())
          return false;
110 111
      } else {
        if (!day.Add(n)) return false;
112
        scanner.SkipSymbol('-');
113
      }
114
    } else if (token.IsKeyword()) {
115
      legacy_parser = true;
116
      // Parse a "word" (sequence of chars. >= 'A').
117 118
      KeywordType type = token.keyword_type();
      int value = token.keyword_value();
119
      if (type == AM_PM && !time.IsEmpty()) {
120
        time.SetHourOffset(value);
121
      } else if (type == MONTH_NAME) {
122 123 124 125
        day.SetNamedMonth(value);
        scanner.SkipSymbol('-');
      } else if (type == TIME_ZONE_NAME && has_read_number) {
        tz.Set(value);
126 127
      } else {
        // Garbage words are illegal if a number has been read.
128
        if (has_read_number) return false;
129 130 131
        // The first number has to be separated from garbage words by
        // whitespace or other separators.
        if (scanner.Peek().IsNumber()) return false;
132
      }
133
    } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
134
      legacy_parser = true;
135
      // Parse UTC offset (only after UTC or time).
136 137 138
      tz.SetSign(token.ascii_sign());
      // The following number may be empty.
      int n = 0;
139
      int length = 0;
140
      if (scanner.Peek().IsNumber()) {
141 142 143
        DateToken next_token = scanner.Next();
        length = next_token.length();
        n = next_token.number();
144 145 146 147
      }
      has_read_number = true;

      if (scanner.Peek().IsSymbol(':')) {
148
        tz.SetAbsoluteHour(n);
149
        // TODO(littledan): Use minutes as part of timezone?
150
        tz.SetAbsoluteMinute(kNone);
151 152 153 154 155 156
      } else if (length == 2 || length == 1) {
        // Handle time zones like GMT-8
        tz.SetAbsoluteHour(n);
        tz.SetAbsoluteMinute(0);
      } else if (length == 4 || length == 3) {
        // Looks like the hhmm format
157 158
        tz.SetAbsoluteHour(n / 100);
        tz.SetAbsoluteMinute(n % 100);
159 160 161
      } else {
        // No need to accept time zones like GMT-12345
        return false;
162
      }
163 164
    } else if ((token.IsAsciiSign() || token.IsSymbol(')')) &&
               has_read_number) {
165 166 167
      // Extra sign or ')' is illegal if a number has been read.
      return false;
    } else {
168
      // Ignore other characters and whitespace.
169 170
    }
  }
171

172 173 174 175 176 177 178
  bool success = day.Write(out) && time.Write(out) && tz.Write(out);

  if (legacy_parser && success) {
    isolate->CountUsage(v8::Isolate::kLegacyDateParser);
  }

  return success;
179 180
}

Yang Guo's avatar
Yang Guo committed
181
template <typename CharType>
182 183 184 185 186 187 188 189 190 191 192 193 194 195
DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() {
  int pre_pos = in_->position();
  if (in_->IsEnd()) return DateToken::EndOfInput();
  if (in_->IsAsciiDigit()) {
    int n = in_->ReadUnsignedNumeral();
    int length = in_->position() - pre_pos;
    return DateToken::Number(n, length);
  }
  if (in_->Skip(':')) return DateToken::Symbol(':');
  if (in_->Skip('-')) return DateToken::Symbol('-');
  if (in_->Skip('+')) return DateToken::Symbol('+');
  if (in_->Skip('.')) return DateToken::Symbol('.');
  if (in_->Skip(')')) return DateToken::Symbol(')');
  if (in_->IsAsciiAlphaOrAbove()) {
196
    DCHECK_EQ(KeywordTable::kPrefixLength, 3);
197 198 199 200
    uint32_t buffer[3] = {0, 0, 0};
    int length = in_->ReadWord(buffer, 3);
    int index = KeywordTable::Lookup(buffer, length);
    return DateToken::Keyword(KeywordTable::GetType(index),
Yang Guo's avatar
Yang Guo committed
201
                              KeywordTable::GetValue(index), length);
202 203 204 205 206 207 208 209 210 211 212
  }
  if (in_->SkipWhiteSpace()) {
    return DateToken::WhiteSpace(in_->position() - pre_pos);
  }
  if (in_->SkipParentheses()) {
    return DateToken::Unknown();
  }
  in_->Next();
  return DateToken::Unknown();
}

213 214
template <typename Char>
bool DateParser::InputReader<Char>::SkipWhiteSpace() {
215
  if (IsWhiteSpaceOrLineTerminator(ch_)) {
216 217 218 219 220 221 222 223 224 225 226
    Next();
    return true;
  }
  return false;
}

template <typename Char>
bool DateParser::InputReader<Char>::SkipParentheses() {
  if (ch_ != '(') return false;
  int balance = 0;
  do {
Yang Guo's avatar
Yang Guo committed
227 228 229 230
    if (ch_ == ')')
      --balance;
    else if (ch_ == '(')
      ++balance;
231 232 233 234 235
    Next();
  } while (balance > 0 && ch_);
  return true;
}

236
template <typename Char>
237 238
DateParser::DateToken DateParser::ParseES5DateTime(
    DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
239
    TimeZoneComposer* tz) {
240 241 242
  DCHECK(day->IsEmpty());
  DCHECK(time->IsEmpty());
  DCHECK(tz->IsEmpty());
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260

  // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]]
  if (scanner->Peek().IsAsciiSign()) {
    // Keep the sign token, so we can pass it back to the legacy
    // parser if we don't use it.
    DateToken sign_token = scanner->Next();
    if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token;
    int sign = sign_token.ascii_sign();
    int year = scanner->Next().number();
    if (sign < 0 && year == 0) return sign_token;
    day->Add(sign * year);
  } else if (scanner->Peek().IsFixedLengthNumber(4)) {
    day->Add(scanner->Next().number());
  } else {
    return scanner->Next();
  }
  if (scanner->SkipSymbol('-')) {
    if (!scanner->Peek().IsFixedLengthNumber(2) ||
Yang Guo's avatar
Yang Guo committed
261 262
        !DayComposer::IsMonth(scanner->Peek().number()))
      return scanner->Next();
263 264 265
    day->Add(scanner->Next().number());
    if (scanner->SkipSymbol('-')) {
      if (!scanner->Peek().IsFixedLengthNumber(2) ||
Yang Guo's avatar
Yang Guo committed
266 267
          !DayComposer::IsDay(scanner->Peek().number()))
        return scanner->Next();
268 269 270 271 272 273 274
      day->Add(scanner->Next().number());
    }
  }
  // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z
  if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) {
    if (!scanner->Peek().IsEndOfInput()) return scanner->Next();
  } else {
275
    // ES5 Date Time String time part is present.
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
    scanner->Next();
    if (!scanner->Peek().IsFixedLengthNumber(2) ||
        !Between(scanner->Peek().number(), 0, 24)) {
      return DateToken::Invalid();
    }
    // Allow 24:00[:00[.000]], but no other time starting with 24.
    bool hour_is_24 = (scanner->Peek().number() == 24);
    time->Add(scanner->Next().number());
    if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
    if (!scanner->Peek().IsFixedLengthNumber(2) ||
        !TimeComposer::IsMinute(scanner->Peek().number()) ||
        (hour_is_24 && scanner->Peek().number() > 0)) {
      return DateToken::Invalid();
    }
    time->Add(scanner->Next().number());
    if (scanner->SkipSymbol(':')) {
      if (!scanner->Peek().IsFixedLengthNumber(2) ||
          !TimeComposer::IsSecond(scanner->Peek().number()) ||
          (hour_is_24 && scanner->Peek().number() > 0)) {
        return DateToken::Invalid();
      }
      time->Add(scanner->Next().number());
      if (scanner->SkipSymbol('.')) {
        if (!scanner->Peek().IsNumber() ||
            (hour_is_24 && scanner->Peek().number() > 0)) {
          return DateToken::Invalid();
        }
        // Allow more or less than the mandated three digits.
        time->Add(ReadMilliseconds(scanner->Next()));
      }
    }
    // Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm
    if (scanner->Peek().IsKeywordZ()) {
      scanner->Next();
      tz->Set(0);
Yang Guo's avatar
Yang Guo committed
311
    } else if (scanner->Peek().IsSymbol('+') || scanner->Peek().IsSymbol('-')) {
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
      tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1);
      if (scanner->Peek().IsFixedLengthNumber(4)) {
        // hhmm extension syntax.
        int hourmin = scanner->Next().number();
        int hour = hourmin / 100;
        int min = hourmin % 100;
        if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) {
          return DateToken::Invalid();
        }
        tz->SetAbsoluteHour(hour);
        tz->SetAbsoluteMinute(min);
      } else {
        // hh:mm standard syntax.
        if (!scanner->Peek().IsFixedLengthNumber(2) ||
            !TimeComposer::IsHour(scanner->Peek().number())) {
          return DateToken::Invalid();
        }
        tz->SetAbsoluteHour(scanner->Next().number());
        if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
        if (!scanner->Peek().IsFixedLengthNumber(2) ||
            !TimeComposer::IsMinute(scanner->Peek().number())) {
          return DateToken::Invalid();
        }
        tz->SetAbsoluteMinute(scanner->Next().number());
      }
    }
    if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid();
  }
340 341 342 343 344 345 346
  // Successfully parsed ES5 Date Time String.
  // ES#sec-date-time-string-format Date Time String Format
  // "When the time zone offset is absent, date-only forms are interpreted
  //  as a UTC time and date-time forms are interpreted as a local time."
  if (tz->IsEmpty() && time->IsEmpty()) {
    tz->Set(0);
  }
347 348 349 350
  day->set_iso_date();
  return DateToken::EndOfInput();
}

351 352
}  // namespace internal
}  // namespace v8
353

Yang Guo's avatar
Yang Guo committed
354
#endif  // V8_DATE_DATEPARSER_INL_H_