dateparser.h 10.5 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

Yang Guo's avatar
Yang Guo committed
5 6
#ifndef V8_DATE_DATEPARSER_H_
#define V8_DATE_DATEPARSER_H_
7

8
#include "src/allocation.h"
9
#include "src/char-predicates.h"
10

11 12
namespace v8 {
namespace internal {
13 14 15 16 17 18 19 20 21 22 23

class DateParser : public AllStatic {
 public:
  // Parse the string as a date. If parsing succeeds, return true after
  // filling out the output array as follows (all integers are Smis):
  // [0]: year
  // [1]: month (0 = Jan, 1 = Feb, ...)
  // [2]: day
  // [3]: hour
  // [4]: minute
  // [5]: second
24 25
  // [6]: millisecond
  // [7]: UTC offset in seconds, or null value if no timezone specified
26
  // If parsing fails, return false (content of output array is not defined).
27
  template <typename Char>
28
  static bool Parse(Isolate* isolate, Vector<Char> str, FixedArray output);
29

30
  enum {
Yang Guo's avatar
Yang Guo committed
31 32 33 34 35 36 37 38 39
    YEAR,
    MONTH,
    DAY,
    HOUR,
    MINUTE,
    SECOND,
    MILLISECOND,
    UTC_OFFSET,
    OUTPUT_SIZE
40
  };
41 42 43

 private:
  // Range testing
44 45 46
  static inline bool Between(int x, int lo, int hi) {
    return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
  }
47

48 49 50
  // Indicates a missing value.
  static const int kNone = kMaxInt;

51 52 53 54
  // Maximal number of digits used to build the value of a numeral.
  // Remaining digits are ignored.
  static const int kMaxSignificantDigits = 9;

55
  // InputReader provides basic string parsing and character classification.
56
  template <typename Char>
57
  class InputReader {
58
   public:
59
    explicit InputReader(Vector<Char> s) : index_(0), buffer_(s) { Next(); }
60

61 62
    int position() { return index_; }

63
    // Advance to the next character of the string.
64 65 66
    void Next() {
      ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
      index_++;
67 68
    }

69 70 71 72
    // Read a string of digits as an unsigned number. Cap value at
    // kMaxSignificantDigits, but skip remaining digits if the numeral
    // is longer.
    int ReadUnsignedNumeral() {
73
      int n = 0;
74 75 76 77 78
      int i = 0;
      while (IsAsciiDigit()) {
        if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
        i++;
        Next();
79 80 81 82
      }
      return n;
    }

83 84 85 86 87 88
    // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
    // lower-case prefix, and pad any remainder of the buffer with zeroes.
    // Return word length.
    int ReadWord(uint32_t* prefix, int prefix_size) {
      int len;
      for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
89
        if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
90 91 92 93 94 95
      }
      for (int i = len; i < prefix_size; i++) prefix[i] = 0;
      return len;
    }

    // The skip methods return whether they actually skipped something.
96 97 98 99 100 101 102
    bool Skip(uint32_t c) {
      if (ch_ == c) {
        Next();
        return true;
      }
      return false;
    }
103

104 105
    inline bool SkipWhiteSpace();
    inline bool SkipParentheses();
106 107 108 109 110 111 112 113 114 115 116 117

    // Character testing/classification. Non-ASCII digits are not supported.
    bool Is(uint32_t c) const { return ch_ == c; }
    bool IsEnd() const { return ch_ == 0; }
    bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
    bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
    bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }

    // Return 1 for '+' and -1 for '-'.
    int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }

   private:
118 119
    int index_;
    Vector<Char> buffer_;
120 121 122
    uint32_t ch_;
  };

123
  enum KeywordType {
Yang Guo's avatar
Yang Guo committed
124 125 126 127 128
    INVALID,
    MONTH_NAME,
    TIME_ZONE_NAME,
    TIME_SEPARATOR,
    AM_PM
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
  };

  struct DateToken {
   public:
    bool IsInvalid() { return tag_ == kInvalidTokenTag; }
    bool IsUnknown() { return tag_ == kUnknownTokenTag; }
    bool IsNumber() { return tag_ == kNumberTag; }
    bool IsSymbol() { return tag_ == kSymbolTag; }
    bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
    bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
    bool IsKeyword() { return tag_ >= kKeywordTagStart; }

    int length() { return length_; }

    int number() {
144
      DCHECK(IsNumber());
145 146 147
      return value_;
    }
    KeywordType keyword_type() {
148
      DCHECK(IsKeyword());
149 150 151
      return static_cast<KeywordType>(tag_);
    }
    int keyword_value() {
152
      DCHECK(IsKeyword());
153 154 155
      return value_;
    }
    char symbol() {
156
      DCHECK(IsSymbol());
157 158 159 160 161
      return static_cast<char>(value_);
    }
    bool IsSymbol(char symbol) {
      return IsSymbol() && this->symbol() == symbol;
    }
Yang Guo's avatar
Yang Guo committed
162
    bool IsKeywordType(KeywordType tag) { return tag_ == tag; }
163 164 165 166 167 168 169
    bool IsFixedLengthNumber(int length) {
      return IsNumber() && length_ == length;
    }
    bool IsAsciiSign() {
      return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
    }
    int ascii_sign() {
170
      DCHECK(IsAsciiSign());
171 172 173 174 175
      return 44 - value_;
    }
    bool IsKeywordZ() {
      return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
    }
Yang Guo's avatar
Yang Guo committed
176
    bool IsUnknown(int character) { return IsUnknown() && value_ == character; }
177 178 179 180 181 182 183 184 185 186
    // Factory functions.
    static DateToken Keyword(KeywordType tag, int value, int length) {
      return DateToken(tag, length, value);
    }
    static DateToken Number(int value, int length) {
      return DateToken(kNumberTag, length, value);
    }
    static DateToken Symbol(char symbol) {
      return DateToken(kSymbolTag, 1, symbol);
    }
Yang Guo's avatar
Yang Guo committed
187
    static DateToken EndOfInput() { return DateToken(kEndOfInputTag, 0, -1); }
188 189 190
    static DateToken WhiteSpace(int length) {
      return DateToken(kWhiteSpaceTag, length, -1);
    }
Yang Guo's avatar
Yang Guo committed
191 192
    static DateToken Unknown() { return DateToken(kUnknownTokenTag, 1, -1); }
    static DateToken Invalid() { return DateToken(kInvalidTokenTag, 0, -1); }
193

194 195 196 197 198 199 200 201 202 203 204
   private:
    enum TagType {
      kInvalidTokenTag = -6,
      kUnknownTokenTag = -5,
      kWhiteSpaceTag = -4,
      kNumberTag = -3,
      kSymbolTag = -2,
      kEndOfInputTag = -1,
      kKeywordTagStart = 0
    };
    DateToken(int tag, int length, int value)
Yang Guo's avatar
Yang Guo committed
205
        : tag_(tag), length_(length), value_(value) {}
206 207 208 209 210 211 212 213 214 215

    int tag_;
    int length_;  // Number of characters.
    int value_;
  };

  template <typename Char>
  class DateStringTokenizer {
   public:
    explicit DateStringTokenizer(InputReader<Char>* in)
Yang Guo's avatar
Yang Guo committed
216
        : in_(in), next_(Scan()) {}
217 218 219 220 221 222
    DateToken Next() {
      DateToken result = next_;
      next_ = Scan();
      return result;
    }

Yang Guo's avatar
Yang Guo committed
223
    DateToken Peek() { return next_; }
224 225 226 227 228 229 230
    bool SkipSymbol(char symbol) {
      if (next_.IsSymbol(symbol)) {
        next_ = Scan();
        return true;
      }
      return false;
    }
231

232 233 234 235 236 237 238 239
   private:
    DateToken Scan();

    InputReader<Char>* in_;
    DateToken next_;
  };

  static int ReadMilliseconds(DateToken number);
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261

  // KeywordTable maps names of months, time zones, am/pm to numbers.
  class KeywordTable : public AllStatic {
   public:
    // Look up a word in the keyword table and return an index.
    // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
    // and 'len' is the word length.
    static int Lookup(const uint32_t* pre, int len);
    // Get the type of the keyword at index i.
    static KeywordType GetType(int i) {
      return static_cast<KeywordType>(array[i][kTypeOffset]);
    }
    // Get the value of the keyword at index i.
    static int GetValue(int i) { return array[i][kValueOffset]; }

    static const int kPrefixLength = 3;
    static const int kTypeOffset = kPrefixLength;
    static const int kValueOffset = kTypeOffset + 1;
    static const int kEntrySize = kValueOffset + 1;
    static const int8_t array[][kEntrySize];
  };

262
  class TimeZoneComposer {
263 264 265 266 267 268 269 270 271 272 273 274 275 276
   public:
    TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
    void Set(int offset_in_hours) {
      sign_ = offset_in_hours < 0 ? -1 : 1;
      hour_ = offset_in_hours * sign_;
      minute_ = 0;
    }
    void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
    void SetAbsoluteHour(int hour) { hour_ = hour; }
    void SetAbsoluteMinute(int minute) { minute_ = minute; }
    bool IsExpecting(int n) const {
      return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
    }
    bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
277
    bool Write(FixedArray output);
278
    bool IsEmpty() { return hour_ == kNone; }
Yang Guo's avatar
Yang Guo committed
279

280 281 282 283 284 285
   private:
    int sign_;
    int hour_;
    int minute_;
  };

286
  class TimeComposer {
287 288 289 290
   public:
    TimeComposer() : index_(0), hour_offset_(kNone) {}
    bool IsEmpty() const { return index_ == 0; }
    bool IsExpecting(int n) const {
Yang Guo's avatar
Yang Guo committed
291
      return (index_ == 1 && IsMinute(n)) || (index_ == 2 && IsSecond(n)) ||
292
             (index_ == 3 && IsMillisecond(n));
293 294 295 296 297 298 299 300 301 302
    }
    bool Add(int n) {
      return index_ < kSize ? (comp_[index_++] = n, true) : false;
    }
    bool AddFinal(int n) {
      if (!Add(n)) return false;
      while (index_ < kSize) comp_[index_++] = 0;
      return true;
    }
    void SetHourOffset(int n) { hour_offset_ = n; }
303
    bool Write(FixedArray output);
304 305 306 307

    static bool IsMinute(int x) { return Between(x, 0, 59); }
    static bool IsHour(int x) { return Between(x, 0, 23); }
    static bool IsSecond(int x) { return Between(x, 0, 59); }
308

309 310
   private:
    static bool IsHour12(int x) { return Between(x, 0, 12); }
311
    static bool IsMillisecond(int x) { return Between(x, 0, 999); }
312

313
    static const int kSize = 4;
314 315 316 317 318
    int comp_[kSize];
    int index_;
    int hour_offset_;
  };

319
  class DayComposer {
320
   public:
321
    DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
322 323
    bool IsEmpty() const { return index_ == 0; }
    bool Add(int n) {
324 325 326 327 328 329
      if (index_ < kSize) {
        comp_[index_] = n;
        index_++;
        return true;
      }
      return false;
330 331
    }
    void SetNamedMonth(int n) { named_month_ = n; }
332
    bool Write(FixedArray output);
333
    void set_iso_date() { is_iso_date_ = true; }
334 335 336
    static bool IsMonth(int x) { return Between(x, 1, 12); }
    static bool IsDay(int x) { return Between(x, 1, 31); }

337
   private:
338 339 340 341
    static const int kSize = 3;
    int comp_[kSize];
    int index_;
    int named_month_;
342 343
    // If set, ensures that data is always parsed in year-month-date order.
    bool is_iso_date_;
344
  };
345

346
  // Tries to parse an ES5 Date Time String. Returns the next token
347 348 349 350 351
  // to continue with in the legacy date string parser. If parsing is
  // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
  // returns DateToken::Invalid(). Otherwise parsing continues in the
  // legacy parser.
  template <typename Char>
352 353
  static DateParser::DateToken ParseES5DateTime(
      DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
354
      TimeZoneComposer* tz);
355 356
};

357 358
}  // namespace internal
}  // namespace v8
359

Yang Guo's avatar
Yang Guo committed
360
#endif  // V8_DATE_DATEPARSER_H_