asm-scanner.h 5.88 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_ASMJS_ASM_SCANNER_H_
#define V8_ASMJS_ASM_SCANNER_H_

#include <memory>
#include <string>
#include <unordered_map>

#include "src/asmjs/asm-names.h"
#include "src/base/logging.h"
14
#include "src/base/strings.h"
15
#include "src/common/globals.h"
16 17 18 19

namespace v8 {
namespace internal {

20
class Utf16CharacterStream;
21 22 23 24 25 26 27 28 29 30 31 32

// A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/
// This scanner intentionally avoids the portion of JavaScript lexing
// that are not required to determine if code is valid asm.js code.
// * Strings are disallowed except for 'use asm'.
// * Only the subset of keywords needed to check asm.js invariants are
//   included.
// * Identifiers are accumulated into local + global string tables
//   (for performance).
class V8_EXPORT_PRIVATE AsmJsScanner {
 public:
33
  using token_t = int32_t;
34

35
  explicit AsmJsScanner(Utf16CharacterStream* stream);
36 37 38

  // Get current token.
  token_t Token() const { return token_; }
39 40
  // Get position of current token.
  size_t Position() const { return position_; }
41 42 43 44
  // Advance to the next token.
  void Next();
  // Back up by one token.
  void Rewind();
45 46 47

  // Get raw string for current identifier. Note that the returned string will
  // become invalid when the scanner advances, create a copy to preserve it.
48 49 50 51 52
  const std::string& GetIdentifierString() const {
    // Identifier strings don't work after a rewind.
    DCHECK(!rewind_);
    return identifier_string_;
  }
53

54 55 56 57 58 59 60 61 62 63 64 65 66
  // Check if we just passed a newline.
  bool IsPrecededByNewline() const {
    // Newline tracking doesn't work if you back up.
    DCHECK(!rewind_);
    return preceded_by_newline_;
  }

#if DEBUG
  // Debug only method to go from a token back to its name.
  // Slow, only use for debugging.
  std::string Name(token_t token) const;
#endif

67 68 69
  // Restores old position (token after that position). Note that it is not
  // allowed to rewind right after a seek, because previous tokens are unknown.
  void Seek(size_t pos);
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

  // Select whether identifiers are resolved in global or local scope,
  // and which scope new identifiers are added to.
  void EnterLocalScope() { in_local_scope_ = true; }
  void EnterGlobalScope() { in_local_scope_ = false; }
  // Drop all current local identifiers.
  void ResetLocals();

  // Methods to check if a token is an identifier and which scope.
  bool IsLocal() const { return IsLocal(Token()); }
  bool IsGlobal() const { return IsGlobal(Token()); }
  static bool IsLocal(token_t token) { return token <= kLocalsStart; }
  static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
  // Methods to find the index position of an identifier (count starting from
  // 0 for each scope separately).
  static size_t LocalIndex(token_t token) {
    DCHECK(IsLocal(token));
    return -(token - kLocalsStart);
  }
  static size_t GlobalIndex(token_t token) {
    DCHECK(IsGlobal(token));
    return token - kGlobalsStart;
  }

94 95 96
  // Methods to check if the current token is a numeric literal considered an
  // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
  // that numbers without a dot outside the [0 .. 2^32) range are errors.
97
  bool IsUnsigned() const { return Token() == kUnsigned; }
98 99 100 101
  uint32_t AsUnsigned() const {
    DCHECK(IsUnsigned());
    return unsigned_value_;
  }
102
  bool IsDouble() const { return Token() == kDouble; }
103 104 105 106
  double AsDouble() const {
    DCHECK(IsDouble());
    return double_value_;
  }
107 108 109

  // clang-format off
  enum {
110 111
    // [-10000-kMaxIdentifierCount, -10000)    :: Local identifiers (counting
    //                                            backwards)
112 113 114 115 116 117 118 119 120 121
    // [-10000 .. -1)                          :: Builtin tokens like keywords
    //                                            (also includes some special
    //                                             ones like end of input)
    // 0        .. 255                         :: Single char tokens
    // 256      .. 256+kMaxIdentifierCount     :: Global identifiers
    kLocalsStart = -10000,
#define V(name, _junk1, _junk2, _junk3) kToken_##name,
    STDLIB_MATH_FUNCTION_LIST(V)
    STDLIB_ARRAY_TYPE_LIST(V)
#undef V
122 123 124
#define V(name, _junk1) kToken_##name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
125 126 127 128 129 130 131 132 133 134 135 136 137 138
#define V(name) kToken_##name,
    STDLIB_OTHER_LIST(V)
    KEYWORD_NAME_LIST(V)
#undef V
#define V(rawname, name) kToken_##name,
    LONG_SYMBOL_NAME_LIST(V)
#undef V
#define V(name, value, string_name) name = value,
    SPECIAL_TOKEN_LIST(V)
#undef V
    kGlobalsStart = 256,
  };
  // clang-format on

139 140
  static constexpr base::uc32 kEndOfInputU =
      static_cast<base::uc32>(kEndOfInput);
141

142
 private:
143
  Utf16CharacterStream* stream_;
144 145
  token_t token_;
  token_t preceding_token_;
146 147 148 149
  token_t next_token_;         // Only set when in {rewind} state.
  size_t position_;            // Corresponds to {token} position.
  size_t preceding_position_;  // Corresponds to {preceding_token} position.
  size_t next_position_;       // Only set when in {rewind} state.
150 151 152 153 154 155 156 157
  bool rewind_;
  std::string identifier_string_;
  bool in_local_scope_;
  std::unordered_map<std::string, token_t> local_names_;
  std::unordered_map<std::string, token_t> global_names_;
  std::unordered_map<std::string, token_t> property_names_;
  int global_count_;
  double double_value_;
158
  uint32_t unsigned_value_;
159 160 161
  bool preceded_by_newline_;

  // Consume multiple characters.
162 163
  void ConsumeIdentifier(base::uc32 ch);
  void ConsumeNumber(base::uc32 ch);
164 165
  bool ConsumeCComment();
  void ConsumeCPPComment();
166 167
  void ConsumeString(base::uc32 quote);
  void ConsumeCompareOrShift(base::uc32 ch);
168 169

  // Classify character categories.
170 171 172
  bool IsIdentifierStart(base::uc32 ch);
  bool IsIdentifierPart(base::uc32 ch);
  bool IsNumberStart(base::uc32 ch);
173 174 175 176
};

}  // namespace internal
}  // namespace v8
177 178

#endif  // V8_ASMJS_ASM_SCANNER_H_