asm-scanner.h 5.72 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_ASMJS_ASM_SCANNER_H_
#define V8_ASMJS_ASM_SCANNER_H_

#include <memory>
#include <string>
#include <unordered_map>

#include "src/asmjs/asm-names.h"
#include "src/base/logging.h"
14
#include "src/common/globals.h"
15 16 17 18

namespace v8 {
namespace internal {

19
class Utf16CharacterStream;
20 21 22 23 24 25 26 27 28 29 30 31

// A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/
// This scanner intentionally avoids the portion of JavaScript lexing
// that are not required to determine if code is valid asm.js code.
// * Strings are disallowed except for 'use asm'.
// * Only the subset of keywords needed to check asm.js invariants are
//   included.
// * Identifiers are accumulated into local + global string tables
//   (for performance).
class V8_EXPORT_PRIVATE AsmJsScanner {
 public:
32
  using token_t = int32_t;
33

34
  explicit AsmJsScanner(Utf16CharacterStream* stream);
35 36 37

  // Get current token.
  token_t Token() const { return token_; }
38 39
  // Get position of current token.
  size_t Position() const { return position_; }
40 41 42 43
  // Advance to the next token.
  void Next();
  // Back up by one token.
  void Rewind();
44 45 46

  // Get raw string for current identifier. Note that the returned string will
  // become invalid when the scanner advances, create a copy to preserve it.
47 48 49 50 51
  const std::string& GetIdentifierString() const {
    // Identifier strings don't work after a rewind.
    DCHECK(!rewind_);
    return identifier_string_;
  }
52

53 54 55 56 57 58 59 60 61 62 63 64 65
  // Check if we just passed a newline.
  bool IsPrecededByNewline() const {
    // Newline tracking doesn't work if you back up.
    DCHECK(!rewind_);
    return preceded_by_newline_;
  }

#if DEBUG
  // Debug only method to go from a token back to its name.
  // Slow, only use for debugging.
  std::string Name(token_t token) const;
#endif

66 67 68
  // Restores old position (token after that position). Note that it is not
  // allowed to rewind right after a seek, because previous tokens are unknown.
  void Seek(size_t pos);
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92

  // Select whether identifiers are resolved in global or local scope,
  // and which scope new identifiers are added to.
  void EnterLocalScope() { in_local_scope_ = true; }
  void EnterGlobalScope() { in_local_scope_ = false; }
  // Drop all current local identifiers.
  void ResetLocals();

  // Methods to check if a token is an identifier and which scope.
  bool IsLocal() const { return IsLocal(Token()); }
  bool IsGlobal() const { return IsGlobal(Token()); }
  static bool IsLocal(token_t token) { return token <= kLocalsStart; }
  static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
  // Methods to find the index position of an identifier (count starting from
  // 0 for each scope separately).
  static size_t LocalIndex(token_t token) {
    DCHECK(IsLocal(token));
    return -(token - kLocalsStart);
  }
  static size_t GlobalIndex(token_t token) {
    DCHECK(IsGlobal(token));
    return token - kGlobalsStart;
  }

93 94 95
  // Methods to check if the current token is a numeric literal considered an
  // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
  // that numbers without a dot outside the [0 .. 2^32) range are errors.
96
  bool IsUnsigned() const { return Token() == kUnsigned; }
97 98 99 100
  uint32_t AsUnsigned() const {
    DCHECK(IsUnsigned());
    return unsigned_value_;
  }
101
  bool IsDouble() const { return Token() == kDouble; }
102 103 104 105
  double AsDouble() const {
    DCHECK(IsDouble());
    return double_value_;
  }
106 107 108

  // clang-format off
  enum {
109 110
    // [-10000-kMaxIdentifierCount, -10000)    :: Local identifiers (counting
    //                                            backwards)
111 112 113 114 115 116 117 118 119 120
    // [-10000 .. -1)                          :: Builtin tokens like keywords
    //                                            (also includes some special
    //                                             ones like end of input)
    // 0        .. 255                         :: Single char tokens
    // 256      .. 256+kMaxIdentifierCount     :: Global identifiers
    kLocalsStart = -10000,
#define V(name, _junk1, _junk2, _junk3) kToken_##name,
    STDLIB_MATH_FUNCTION_LIST(V)
    STDLIB_ARRAY_TYPE_LIST(V)
#undef V
121 122 123
#define V(name, _junk1) kToken_##name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
#define V(name) kToken_##name,
    STDLIB_OTHER_LIST(V)
    KEYWORD_NAME_LIST(V)
#undef V
#define V(rawname, name) kToken_##name,
    LONG_SYMBOL_NAME_LIST(V)
#undef V
#define V(name, value, string_name) name = value,
    SPECIAL_TOKEN_LIST(V)
#undef V
    kGlobalsStart = 256,
  };
  // clang-format on

 private:
139
  Utf16CharacterStream* stream_;
140 141
  token_t token_;
  token_t preceding_token_;
142 143 144 145
  token_t next_token_;         // Only set when in {rewind} state.
  size_t position_;            // Corresponds to {token} position.
  size_t preceding_position_;  // Corresponds to {preceding_token} position.
  size_t next_position_;       // Only set when in {rewind} state.
146 147 148 149 150 151 152 153
  bool rewind_;
  std::string identifier_string_;
  bool in_local_scope_;
  std::unordered_map<std::string, token_t> local_names_;
  std::unordered_map<std::string, token_t> global_names_;
  std::unordered_map<std::string, token_t> property_names_;
  int global_count_;
  double double_value_;
154
  uint32_t unsigned_value_;
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
  bool preceded_by_newline_;

  // Consume multiple characters.
  void ConsumeIdentifier(uc32 ch);
  void ConsumeNumber(uc32 ch);
  bool ConsumeCComment();
  void ConsumeCPPComment();
  void ConsumeString(uc32 quote);
  void ConsumeCompareOrShift(uc32 ch);

  // Classify character categories.
  bool IsIdentifierStart(uc32 ch);
  bool IsIdentifierPart(uc32 ch);
  bool IsNumberStart(uc32 ch);
};

}  // namespace internal
}  // namespace v8
173 174

#endif  // V8_ASMJS_ASM_SCANNER_H_