token.h 17.5 KB
Newer Older
1
// Copyright 2012 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5 6
#ifndef V8_PARSING_TOKEN_H_
#define V8_PARSING_TOKEN_H_
7

8
#include "src/base/logging.h"
9
#include "src/globals.h"
10

11 12
namespace v8 {
namespace internal {
13 14 15 16

// TOKEN_LIST takes a list of 3 macros M, all of which satisfy the
// same signature M(name, string, precedence), where name is the
// symbolic token name, string is the corresponding syntactic symbol
17
// (or nullptr, for literals), and precedence is the precedence (or 0).
18 19 20 21
// The parameters are invoked for token categories as follows:
//
//   T: Non-keyword tokens
//   K: Keyword tokens
22 23 24 25 26 27
//   C: Contextual keyword token
//
// Contextual keyword tokens are tokens that are scanned as Token::IDENTIFIER,
// but that in some contexts are treated as keywords. This mostly happens
// when ECMAScript introduces new keywords, but for backwards compatibility
// allows them to still be used as indentifiers in most contexts.
28 29 30 31 32 33 34

// IGNORE_TOKEN is a convenience macro that can be supplied as
// an argument (at any position) for a TOKEN_LIST call. It does
// nothing with tokens belonging to the respective category.

#define IGNORE_TOKEN(name, string, precedence)

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
#define TOKEN_LIST(T, K, C)                                        \
  /* End of source indicator. */                                   \
  T(EOS, "EOS", 0)                                                 \
                                                                   \
  /* Punctuators (ECMA-262, section 7.7, page 15). */              \
  T(LPAREN, "(", 0)                                                \
  T(RPAREN, ")", 0)                                                \
  T(LBRACK, "[", 0)                                                \
  T(RBRACK, "]", 0)                                                \
  T(LBRACE, "{", 0)                                                \
  T(RBRACE, "}", 0)                                                \
  T(COLON, ":", 0)                                                 \
  T(SEMICOLON, ";", 0)                                             \
  T(PERIOD, ".", 0)                                                \
  T(ELLIPSIS, "...", 0)                                            \
  T(CONDITIONAL, "?", 3)                                           \
  T(INC, "++", 0)                                                  \
  T(DEC, "--", 0)                                                  \
  T(ARROW, "=>", 0)                                                \
                                                                   \
  /* Assignment operators. */                                      \
  /* IsAssignmentOp() relies on this block of enum values being */ \
  /* contiguous and sorted in the same order! */                   \
  T(INIT, "=init", 2) /* AST-use only. */                          \
  T(ASSIGN, "=", 2)                                                \
  T(ASSIGN_BIT_OR, "|=", 2)                                        \
  T(ASSIGN_BIT_XOR, "^=", 2)                                       \
  T(ASSIGN_BIT_AND, "&=", 2)                                       \
  T(ASSIGN_SHL, "<<=", 2)                                          \
  T(ASSIGN_SAR, ">>=", 2)                                          \
  T(ASSIGN_SHR, ">>>=", 2)                                         \
  T(ASSIGN_ADD, "+=", 2)                                           \
  T(ASSIGN_SUB, "-=", 2)                                           \
  T(ASSIGN_MUL, "*=", 2)                                           \
  T(ASSIGN_DIV, "/=", 2)                                           \
  T(ASSIGN_MOD, "%=", 2)                                           \
  T(ASSIGN_EXP, "**=", 2)                                          \
                                                                   \
  /* Binary operators sorted by precedence. */                     \
  /* IsBinaryOp() relies on this block of enum values */           \
  /* being contiguous and sorted in the same order! */             \
  T(COMMA, ",", 1)                                                 \
  T(OR, "||", 4)                                                   \
  T(AND, "&&", 5)                                                  \
  T(BIT_OR, "|", 6)                                                \
  T(BIT_XOR, "^", 7)                                               \
  T(BIT_AND, "&", 8)                                               \
  T(SHL, "<<", 11)                                                 \
  T(SAR, ">>", 11)                                                 \
  T(SHR, ">>>", 11)                                                \
  T(ADD, "+", 12)                                                  \
  T(SUB, "-", 12)                                                  \
  T(MUL, "*", 13)                                                  \
  T(DIV, "/", 13)                                                  \
  T(MOD, "%", 13)                                                  \
  T(EXP, "**", 14)                                                 \
                                                                   \
  /* Compare operators sorted by precedence. */                    \
  /* IsCompareOp() relies on this block of enum values */          \
  /* being contiguous and sorted in the same order! */             \
  T(EQ, "==", 9)                                                   \
  T(NE, "!=", 9)                                                   \
  T(EQ_STRICT, "===", 9)                                           \
  T(NE_STRICT, "!==", 9)                                           \
  T(LT, "<", 10)                                                   \
  T(GT, ">", 10)                                                   \
  T(LTE, "<=", 10)                                                 \
  T(GTE, ">=", 10)                                                 \
  K(INSTANCEOF, "instanceof", 10)                                  \
  K(IN, "in", 10)                                                  \
                                                                   \
  /* Unary operators. */                                           \
  /* IsUnaryOp() relies on this block of enum values */            \
  /* being contiguous and sorted in the same order! */             \
  T(NOT, "!", 0)                                                   \
  T(BIT_NOT, "~", 0)                                               \
  K(DELETE, "delete", 0)                                           \
  K(TYPEOF, "typeof", 0)                                           \
  K(VOID, "void", 0)                                               \
                                                                   \
  /* Keywords (ECMA-262, section 7.5.2, page 13). */               \
  K(BREAK, "break", 0)                                             \
  K(CASE, "case", 0)                                               \
  K(CATCH, "catch", 0)                                             \
  K(CONTINUE, "continue", 0)                                       \
  K(DEBUGGER, "debugger", 0)                                       \
  K(DEFAULT, "default", 0)                                         \
  /* DELETE */                                                     \
  K(DO, "do", 0)                                                   \
  K(ELSE, "else", 0)                                               \
  K(FINALLY, "finally", 0)                                         \
  K(FOR, "for", 0)                                                 \
  K(FUNCTION, "function", 0)                                       \
  K(IF, "if", 0)                                                   \
  /* IN */                                                         \
  /* INSTANCEOF */                                                 \
  K(NEW, "new", 0)                                                 \
  K(RETURN, "return", 0)                                           \
  K(SWITCH, "switch", 0)                                           \
  K(THIS, "this", 0)                                               \
  K(THROW, "throw", 0)                                             \
  K(TRY, "try", 0)                                                 \
  /* TYPEOF */                                                     \
  K(VAR, "var", 0)                                                 \
  /* VOID */                                                       \
  K(WHILE, "while", 0)                                             \
  K(WITH, "with", 0)                                               \
                                                                   \
  /* Literals (ECMA-262, section 7.8, page 16). */                 \
  K(NULL_LITERAL, "null", 0)                                       \
  K(TRUE_LITERAL, "true", 0)                                       \
  K(FALSE_LITERAL, "false", 0)                                     \
  T(NUMBER, nullptr, 0)                                            \
  T(SMI, nullptr, 0)                                               \
  T(STRING, nullptr, 0)                                            \
150
  T(BIGINT, nullptr, 0)                                            \
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
                                                                   \
  /* Identifiers (not keywords or future reserved words). */       \
  T(IDENTIFIER, nullptr, 0)                                        \
                                                                   \
  /* Future reserved words (ECMA-262, section 7.6.1.2). */         \
  T(FUTURE_STRICT_RESERVED_WORD, nullptr, 0)                       \
  K(ASYNC, "async", 0)                                             \
  /* `await` is a reserved word in module code only */             \
  K(AWAIT, "await", 0)                                             \
  K(CLASS, "class", 0)                                             \
  K(CONST, "const", 0)                                             \
  K(ENUM, "enum", 0)                                               \
  K(EXPORT, "export", 0)                                           \
  K(EXTENDS, "extends", 0)                                         \
  K(IMPORT, "import", 0)                                           \
  K(LET, "let", 0)                                                 \
  K(STATIC, "static", 0)                                           \
  K(YIELD, "yield", 0)                                             \
  K(SUPER, "super", 0)                                             \
                                                                   \
  /* Illegal token - not able to scan. */                          \
  T(ILLEGAL, "ILLEGAL", 0)                                         \
  T(ESCAPED_KEYWORD, nullptr, 0)                                   \
  T(ESCAPED_STRICT_RESERVED_WORD, nullptr, 0)                      \
                                                                   \
  /* Scanner-internal use only. */                                 \
  T(WHITESPACE, nullptr, 0)                                        \
  T(UNINITIALIZED, nullptr, 0)                                     \
  T(REGEXP_LITERAL, nullptr, 0)                                    \
                                                                   \
  /* ES6 Template Literals */                                      \
  T(TEMPLATE_SPAN, nullptr, 0)                                     \
  T(TEMPLATE_TAIL, nullptr, 0)                                     \
                                                                   \
  /* Contextual keyword tokens */                                  \
  C(GET, "get", 0)                                                 \
  C(SET, "set", 0)                                                 \
  C(OF, "of", 0)                                                   \
  C(TARGET, "target", 0)                                           \
  C(SENT, "sent", 0)                                               \
  C(META, "meta", 0)                                               \
  C(AS, "as", 0)                                                   \
  C(FROM, "from", 0)                                               \
  C(NAME, "name", 0)                                               \
  C(PROTO_UNDERSCORED, "__proto__", 0)                             \
  C(CONSTRUCTOR, "constructor", 0)                                 \
  C(PROTOTYPE, "prototype", 0)                                     \
  C(EVAL, "eval", 0)                                               \
  C(ARGUMENTS, "arguments", 0)                                     \
  C(UNDEFINED, "undefined", 0)                                     \
201
  C(ANONYMOUS, "anonymous", 0)
202 203 204 205 206

class Token {
 public:
  // All token values.
#define T(name, string, precedence) name,
207
  enum Value { TOKEN_LIST(T, T, T) NUM_TOKENS };
208 209 210 211 212
#undef T

  // Returns a string corresponding to the C++ token name
  // (e.g. "LT" for the token LT).
  static const char* Name(Value tok) {
213
    DCHECK(tok < NUM_TOKENS);  // tok is unsigned
214 215 216 217
    return name_[tok];
  }

  // Predicates
218 219 220
  static bool IsKeyword(Value tok) {
    return token_type[tok] == 'K';
  }
221
  static bool IsContextualKeyword(Value tok) { return token_type[tok] == 'C'; }
222

223
  static bool IsIdentifier(Value tok, LanguageMode language_mode,
224
                           bool is_generator, bool disallow_await) {
225 226
    switch (tok) {
      case IDENTIFIER:
227
      case ASYNC:
228
        return true;
229
      case ESCAPED_STRICT_RESERVED_WORD:
230 231 232
      case FUTURE_STRICT_RESERVED_WORD:
      case LET:
      case STATIC:
233
        return is_sloppy(language_mode);
234
      case YIELD:
235
        return !is_generator && is_sloppy(language_mode);
236
      case AWAIT:
237
        return !disallow_await;
238 239 240 241 242 243
      default:
        return false;
    }
    UNREACHABLE();
  }

244
  static bool IsAssignmentOp(Value tok) {
245
    return INIT <= tok && tok <= ASSIGN_EXP;
246 247
  }

248
  static bool IsBinaryOp(Value op) { return COMMA <= op && op <= EXP; }
249

250
  static bool IsTruncatingBinaryOp(Value op) {
251
    return BIT_OR <= op && op <= SHR;
252 253
  }

254 255 256 257
  static bool IsCompareOp(Value op) {
    return EQ <= op && op <= IN;
  }

258
  static bool IsOrderedRelationalCompareOp(Value op) {
259 260 261
    return op == LT || op == LTE || op == GT || op == GTE;
  }

262 263 264 265
  static bool IsEqualityOp(Value op) {
    return op == EQ || op == EQ_STRICT;
  }

266 267 268 269 270 271 272 273 274
  static bool IsInequalityOp(Value op) {
    return op == NE || op == NE_STRICT;
  }

  static bool IsArithmeticCompareOp(Value op) {
    return IsOrderedRelationalCompareOp(op) ||
        IsEqualityOp(op) || IsInequalityOp(op);
  }

275
  static Value NegateCompareOp(Value op) {
276
    DCHECK(IsArithmeticCompareOp(op));
277 278 279 280
    switch (op) {
      case EQ: return NE;
      case NE: return EQ;
      case EQ_STRICT: return NE_STRICT;
281
      case NE_STRICT: return EQ_STRICT;
282 283 284 285 286
      case LT: return GTE;
      case GT: return LTE;
      case LTE: return GT;
      case GTE: return LT;
      default:
287
        UNREACHABLE();
288 289 290
    }
  }

291
  static Value ReverseCompareOp(Value op) {
292
    DCHECK(IsArithmeticCompareOp(op));
293
    switch (op) {
294 295 296 297
      case EQ: return EQ;
      case NE: return NE;
      case EQ_STRICT: return EQ_STRICT;
      case NE_STRICT: return NE_STRICT;
298 299 300 301 302
      case LT: return GT;
      case GT: return LT;
      case LTE: return GTE;
      case GTE: return LTE;
      default:
303
        UNREACHABLE();
304 305 306
    }
  }

307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
  static bool EvalComparison(Value op, double op1, double op2) {
    DCHECK(IsArithmeticCompareOp(op));
    switch (op) {
      case Token::EQ:
      case Token::EQ_STRICT: return (op1 == op2);
      case Token::NE: return (op1 != op2);
      case Token::LT: return (op1 < op2);
      case Token::GT: return (op1 > op2);
      case Token::LTE: return (op1 <= op2);
      case Token::GTE: return (op1 >= op2);
      default:
        UNREACHABLE();
    }
  }

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
  static Value BinaryOpForAssignment(Value op) {
    DCHECK(IsAssignmentOp(op));
    switch (op) {
      case Token::ASSIGN_BIT_OR:
        return Token::BIT_OR;
      case Token::ASSIGN_BIT_XOR:
        return Token::BIT_XOR;
      case Token::ASSIGN_BIT_AND:
        return Token::BIT_AND;
      case Token::ASSIGN_SHL:
        return Token::SHL;
      case Token::ASSIGN_SAR:
        return Token::SAR;
      case Token::ASSIGN_SHR:
        return Token::SHR;
      case Token::ASSIGN_ADD:
        return Token::ADD;
      case Token::ASSIGN_SUB:
        return Token::SUB;
      case Token::ASSIGN_MUL:
        return Token::MUL;
      case Token::ASSIGN_DIV:
        return Token::DIV;
      case Token::ASSIGN_MOD:
        return Token::MOD;
      default:
        UNREACHABLE();
    }
  }

352 353 354 355 356 357 358 359 360 361 362 363
  static bool IsBitOp(Value op) {
    return (BIT_OR <= op && op <= SHR) || op == BIT_NOT;
  }

  static bool IsUnaryOp(Value op) {
    return (NOT <= op && op <= VOID) || op == ADD || op == SUB;
  }

  static bool IsCountOp(Value op) {
    return op == INC || op == DEC;
  }

364 365 366 367
  static bool IsShiftOp(Value op) {
    return (SHL <= op) && (op <= SHR);
  }

368
  // Returns a string corresponding to the JS token string
369
  // (.e., "<" for the token LT) or nullptr if the token doesn't
370 371
  // have a (unique) string (e.g. an IDENTIFIER).
  static const char* String(Value tok) {
372
    DCHECK(tok < NUM_TOKENS);  // tok is unsigned.
373 374 375
    return string_[tok];
  }

376 377 378 379 380
  static uint8_t StringLength(Value tok) {
    DCHECK(tok < NUM_TOKENS);
    return string_length_[tok];
  }

381 382 383
  // Returns the precedence > 0 for binary and compare
  // operators; returns 0 otherwise.
  static int Precedence(Value tok) {
384
    DCHECK(tok < NUM_TOKENS);  // tok is unsigned.
385 386 387 388
    return precedence_[tok];
  }

 private:
389 390
  static const char* const name_[NUM_TOKENS];
  static const char* const string_[NUM_TOKENS];
391
  static const uint8_t string_length_[NUM_TOKENS];
392
  static const int8_t precedence_[NUM_TOKENS];
393
  static const char token_type[NUM_TOKENS];
394 395
};

396 397
}  // namespace internal
}  // namespace v8
398

399
#endif  // V8_PARSING_TOKEN_H_