asm-parser.h 14.6 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_ASMJS_ASM_PARSER_H_
#define V8_ASMJS_ASM_PARSER_H_

8
#include <memory>
9 10 11 12
#include <string>

#include "src/asmjs/asm-scanner.h"
#include "src/asmjs/asm-types.h"
13
#include "src/base/enum-set.h"
14
#include "src/utils/vector.h"
15 16 17 18 19
#include "src/wasm/wasm-module-builder.h"
#include "src/zone/zone-containers.h"

namespace v8 {
namespace internal {
20

21
class Utf16CharacterStream;
22

23 24 25 26 27 28 29 30 31 32 33 34 35
namespace wasm {

// A custom parser + validator + wasm converter for asm.js:
// http://asmjs.org/spec/latest/
// This parser intentionally avoids the portion of JavaScript parsing
// that are not required to determine if code is valid asm.js code.
// * It is mostly one pass.
// * It bails out on unexpected input.
// * It assumes strict ordering insofar as permitted by asm.js validation rules.
// * It relies on a custom scanner that provides de-duped identifiers in two
//   scopes (local + module wide).
class AsmJsParser {
 public:
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
  // clang-format off
  enum StandardMember {
    kInfinity,
    kNaN,
#define V(_unused1, name, _unused2, _unused3) kMath##name,
    STDLIB_MATH_FUNCTION_LIST(V)
#undef V
#define V(name, _unused1) kMath##name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
#define V(name, _unused1, _unused2, _unused3) k##name,
    STDLIB_ARRAY_TYPE_LIST(V)
#undef V
  };
  // clang-format on

52
  using StdlibSet = base::EnumSet<StandardMember, uint64_t>;
53

54 55
  explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
                       Utf16CharacterStream* stream);
56
  bool Run();
57
  const char* failure_message() const { return failure_message_; }
58 59
  int failure_location() const { return failure_location_; }
  WasmModuleBuilder* module_builder() { return module_builder_; }
60
  const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }
61 62 63 64 65 66 67 68 69 70 71 72 73 74

 private:
  // clang-format off
  enum class VarKind {
    kUnused,
    kLocal,
    kGlobal,
    kSpecial,
    kFunction,
    kTable,
    kImportedFunction,
#define V(_unused0, Name, _unused1, _unused2) kMath##Name,
    STDLIB_MATH_FUNCTION_LIST(V)
#undef V
75
#define V(Name, _unused1) kMath##Name,
76 77 78 79 80
    STDLIB_MATH_VALUE_LIST(V)
#undef V
  };
  // clang-format on

81 82 83
  // A single import in asm.js can require multiple imports in wasm, if the
  // function is used with different signatures. {cache} keeps the wasm
  // imports for the single asm.js import of name {function_name}.
84
  struct FunctionImportInfo {
85
    Vector<const char> function_name;
86 87 88 89 90
    ZoneUnorderedMap<FunctionSig, uint32_t> cache;

    // Constructor.
    FunctionImportInfo(Vector<const char> name, Zone* zone)
        : function_name(name), cache(zone) {}
91 92 93
  };

  struct VarInfo {
94 95 96 97 98 99 100 101
    AsmType* type = AsmType::None();
    WasmFunctionBuilder* function_builder = nullptr;
    FunctionImportInfo* import = nullptr;
    uint32_t mask = 0;
    uint32_t index = 0;
    VarKind kind = VarKind::kUnused;
    bool mutable_variable = true;
    bool function_defined = false;
102 103 104
  };

  struct GlobalImport {
105
    Vector<const char> import_name;
106 107
    ValueType value_type;
    VarInfo* var_info;
108 109
  };

110 111 112 113 114 115 116 117 118 119 120 121 122 123
  // Distinguish different kinds of blocks participating in {block_stack}. Each
  // entry on that stack represents one block in the wasm code, and determines
  // which block 'break' and 'continue' target in the current context:
  //  - kRegular: The target of a 'break' (with & without identifier).
  //              Pushed by an IterationStatement and a SwitchStatement.
  //  - kLoop   : The target of a 'continue' (with & without identifier).
  //              Pushed by an IterationStatement.
  //  - kNamed  : The target of a 'break' with a specific identifier.
  //              Pushed by a BlockStatement.
  //  - kOther  : Only used for internal blocks, can never be targeted.
  enum class BlockKind { kRegular, kLoop, kNamed, kOther };

  // One entry in the {block_stack}, see {BlockKind} above for details. Blocks
  // without a label have {kTokenNone} set as their label.
124 125 126 127 128
  struct BlockInfo {
    BlockKind kind;
    AsmJsScanner::token_t label;
  };

129 130 131
  // Helper class to make {TempVariable} safe for nesting.
  class TemporaryVariableScope;

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
  template <typename T>
  class CachedVectors {
   public:
    explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}

    Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }

    inline void fill(ZoneVector<T>* vec) {
      if (reusable_vectors_.empty()) return;
      reusable_vectors_.back().swap(*vec);
      reusable_vectors_.pop_back();
      vec->clear();
    }

    inline void reuse(ZoneVector<T>* vec) {
      reusable_vectors_.emplace_back(std::move(*vec));
    }

   private:
    ZoneVector<ZoneVector<T>> reusable_vectors_;
  };

  template <typename T>
  class CachedVector final : public ZoneVector<T> {
   public:
157 158 159
    explicit CachedVector(CachedVectors<T>* cache)
        : ZoneVector<T>(cache->zone()), cache_(cache) {
      cache->fill(this);
160 161 162 163 164 165 166
    }
    ~CachedVector() { cache_->reuse(this); }

   private:
    CachedVectors<T>* cache_;
  };

167 168 169 170
  Zone* zone_;
  AsmJsScanner scanner_;
  WasmModuleBuilder* module_builder_;
  WasmFunctionBuilder* current_function_builder_;
171
  AsmType* return_type_ = nullptr;
172
  uintptr_t stack_limit_;
173
  StdlibSet stdlib_uses_;
174 175 176
  Vector<VarInfo> global_var_info_;
  Vector<VarInfo> local_var_info_;
  size_t num_globals_ = 0;
177

178 179 180 181 182
  CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
  CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
  CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
  CachedVectors<int32_t> cached_int_vectors_{zone_};

183 184
  int function_temp_locals_offset_;
  int function_temp_locals_used_;
185
  int function_temp_locals_depth_;
186 187

  // Error Handling related
188
  bool failed_ = false;
189
  const char* failure_message_;
190
  int failure_location_ = kNoSourcePosition;
191 192

  // Module Related.
193 194 195
  AsmJsScanner::token_t stdlib_name_ = kTokenNone;
  AsmJsScanner::token_t foreign_name_ = kTokenNone;
  AsmJsScanner::token_t heap_name_ = kTokenNone;
196 197 198 199

  static const AsmJsScanner::token_t kTokenNone = 0;

  // Track if parsing a heap assignment.
200 201
  bool inside_heap_assignment_ = false;
  AsmType* heap_access_type_ = nullptr;
202 203 204 205 206 207 208 209 210 211 212 213 214 215

  ZoneVector<BlockInfo> block_stack_;

  // Types used for stdlib function and their set up.
  AsmType* stdlib_dq2d_;
  AsmType* stdlib_dqdq2d_;
  AsmType* stdlib_i2s_;
  AsmType* stdlib_ii2s_;
  AsmType* stdlib_minmax_;
  AsmType* stdlib_abs_;
  AsmType* stdlib_ceil_like_;
  AsmType* stdlib_fround_;

  // When making calls, the return type is needed to lookup signatures.
216
  // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
217
  // along the coercion.
218
  AsmType* call_coercion_ = nullptr;
219

220 221 222
  // The source position associated with the above {call_coercion}.
  size_t call_coercion_position_;

223 224 225
  // When making calls, the coercion can also appear in the source stream
  // syntactically "behind" the call site. For `callsite(..)|0` use this
  // value to flag that such a coercion must happen.
226
  AsmType* call_coercion_deferred_ = nullptr;
227 228 229 230 231

  // The source position at which requesting a deferred coercion via the
  // aforementioned {call_coercion_deferred} is allowed.
  size_t call_coercion_deferred_position_;

232 233 234 235 236 237 238 239
  // The code position of the last heap access shift by an immediate value.
  // For `heap[expr >> value:NumericLiteral]` this indicates from where to
  // delete code when the expression is used as part of a valid heap access.
  // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
  size_t heap_access_shift_position_;
  uint32_t heap_access_shift_value_;
  static const size_t kNoHeapAccessShift = -1;

240 241
  // Used to track the last label we've seen so it can be matched to later
  // statements it's attached to.
242
  AsmJsScanner::token_t pending_label_ = kTokenNone;
243

244 245
  // Global imports. The list of imported variables that are copied during
  // module instantiation into a corresponding global variable.
246 247 248 249 250 251 252 253
  ZoneLinkedList<GlobalImport> global_imports_;

  Zone* zone() { return zone_; }

  inline bool Peek(AsmJsScanner::token_t token) {
    return scanner_.Token() == token;
  }

254 255 256 257
  inline bool PeekForZero() {
    return (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0);
  }

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
  inline bool Check(AsmJsScanner::token_t token) {
    if (scanner_.Token() == token) {
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForZero() {
    if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForDouble(double* value) {
    if (scanner_.IsDouble()) {
      *value = scanner_.AsDouble();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

286
  inline bool CheckForUnsigned(uint32_t* value) {
287 288 289 290 291 292 293 294 295
    if (scanner_.IsUnsigned()) {
      *value = scanner_.AsUnsigned();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

296
  inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
    if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
      *value = scanner_.AsUnsigned();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline AsmJsScanner::token_t Consume() {
    AsmJsScanner::token_t ret = scanner_.Token();
    scanner_.Next();
    return ret;
  }

  void SkipSemicolon();

  VarInfo* GetVarInfo(AsmJsScanner::token_t token);
  uint32_t VarIndex(VarInfo* info);
  void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
317
                     ValueType vtype, WasmInitExpr init = WasmInitExpr());
318
  void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
319 320
  void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
                       bool mutable_variable, VarInfo* info);
321

322 323 324
  // Allocates a temporary local variable. The given {index} is absolute within
  // the function body, consider using {TemporaryVariableScope} when nesting.
  uint32_t TempVariable(int index);
325

326 327
  // Preserves a copy of the scanner's current identifier string in the zone.
  Vector<const char> CopyCurrentIdentifierString();
328 329 330

  // Use to set up block stack layers (including synthetic ones for if-else).
  // Begin/Loop/End below are implemented with these plus code generation.
331
  void BareBegin(BlockKind kind, AsmJsScanner::token_t label = 0);
332 333 334 335 336 337 338 339 340 341 342 343
  void BareEnd();
  int FindContinueLabelDepth(AsmJsScanner::token_t label);
  int FindBreakLabelDepth(AsmJsScanner::token_t label);

  // Use to set up actual wasm blocks/loops.
  void Begin(AsmJsScanner::token_t label = 0);
  void Loop(AsmJsScanner::token_t label = 0);
  void End();

  void InitializeStdlibTypes();

  FunctionSig* ConvertSignature(AsmType* return_type,
344
                                const ZoneVector<AsmType*>& params);
345

346 347 348
  void ValidateModule();            // 6.1 ValidateModule
  void ValidateModuleParameters();  // 6.1 ValidateModule - parameters
  void ValidateModuleVars();        // 6.1 ValidateModule - variables
349
  void ValidateModuleVar(bool mutable_variable);
350
  void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
351 352
  void ValidateModuleVarStdlib(VarInfo* info);
  void ValidateModuleVarNewStdlib(VarInfo* info);
353
  void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);
354 355 356 357

  void ValidateExport();         // 6.2 ValidateExport
  void ValidateFunctionTable();  // 6.3 ValidateFunctionTable
  void ValidateFunction();       // 6.4 ValidateFunction
358
  void ValidateFunctionParams(ZoneVector<AsmType*>* params);
359
  void ValidateFunctionLocals(size_t param_count,
360
                              ZoneVector<ValueType>* locals);
361
  void ValidateStatement();              // 6.5 ValidateStatement
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
  void Block();                          // 6.5.1 Block
  void ExpressionStatement();            // 6.5.2 ExpressionStatement
  void EmptyStatement();                 // 6.5.3 EmptyStatement
  void IfStatement();                    // 6.5.4 IfStatement
  void ReturnStatement();                // 6.5.5 ReturnStatement
  bool IterationStatement();             // 6.5.6 IterationStatement
  void WhileStatement();                 // 6.5.6 IterationStatement - while
  void DoStatement();                    // 6.5.6 IterationStatement - do
  void ForStatement();                   // 6.5.6 IterationStatement - for
  void BreakStatement();                 // 6.5.7 BreakStatement
  void ContinueStatement();              // 6.5.8 ContinueStatement
  void LabelledStatement();              // 6.5.9 LabelledStatement
  void SwitchStatement();                // 6.5.10 SwitchStatement
  void ValidateCase();                   // 6.6. ValidateCase
  void ValidateDefault();                // 6.7 ValidateDefault
  AsmType* ValidateExpression();         // 6.8 ValidateExpression
  AsmType* Expression(AsmType* expect);  // 6.8.1 Expression
  AsmType* NumericLiteral();             // 6.8.2 NumericLiteral
  AsmType* Identifier();                 // 6.8.3 Identifier
  AsmType* CallExpression();             // 6.8.4 CallExpression
  AsmType* MemberExpression();           // 6.8.5 MemberExpression
  AsmType* AssignmentExpression();       // 6.8.6 AssignmentExpression
  AsmType* UnaryExpression();            // 6.8.7 UnaryExpression
385
  AsmType* MultiplicativeExpression();   // 6.8.8 MultiplicativeExpression
386 387 388 389 390 391 392 393 394 395 396 397 398 399
  AsmType* AdditiveExpression();         // 6.8.9 AdditiveExpression
  AsmType* ShiftExpression();            // 6.8.10 ShiftExpression
  AsmType* RelationalExpression();       // 6.8.11 RelationalExpression
  AsmType* EqualityExpression();         // 6.8.12 EqualityExpression
  AsmType* BitwiseANDExpression();       // 6.8.13 BitwiseANDExpression
  AsmType* BitwiseXORExpression();       // 6.8.14 BitwiseXORExpression
  AsmType* BitwiseORExpression();        // 6.8.15 BitwiseORExpression
  AsmType* ConditionalExpression();      // 6.8.16 ConditionalExpression
  AsmType* ParenthesizedExpression();    // 6.8.17 ParenthesiedExpression
  AsmType* ValidateCall();               // 6.9 ValidateCall
  bool PeekCall();                       // 6.9 ValidateCall - helper
  void ValidateHeapAccess();             // 6.10 ValidateHeapAccess
  void ValidateFloatCoercion();          // 6.11 ValidateFloatCoercion

400 401 402 403 404 405 406 407
  // Used as part of {ForStatement}. Scans forward to the next `)` in order to
  // skip over the third expression in a for-statement. This is one piece that
  // makes this parser not be a pure single-pass.
  void ScanToClosingParenthesis();

  // Used as part of {SwitchStatement}. Collects all case labels in the current
  // switch-statement, then resets the scanner position. This is one piece that
  // makes this parser not be a pure single-pass.
408
  void GatherCases(ZoneVector<int32_t>* cases);
409 410 411 412 413
};

}  // namespace wasm
}  // namespace internal
}  // namespace v8
414 415

#endif  // V8_ASMJS_ASM_PARSER_H_