Commit 4c3217e1 authored by bradnelson's avatar bradnelson Committed by Commit bot

[wasm][asm.js] Adding custom asm.js lexer.

Adding a custom lexer for asm.js parsing.
It takes advantage of a number of asm.js properties to simply things:
* Assumes 'use asm' is the only string.
* Does not handle unicode for now (tools don't emit it).
* Combines global + local string table with lexer.

R=marja@chromium.org,vogelheim@chromium.org,kschimpf@chromium.org

BUG=v8:4203
BUG=v8:6090

Review-Url: https://codereview.chromium.org/2751693002
Cr-Commit-Position: refs/heads/master@{#43874}
parent 18c77ce5
......@@ -968,6 +968,9 @@ v8_source_set("v8_base") {
"src/arguments.h",
"src/asmjs/asm-js.cc",
"src/asmjs/asm-js.h",
"src/asmjs/asm-names.h",
"src/asmjs/asm-scanner.cc",
"src/asmjs/asm-scanner.h",
"src/asmjs/asm-typer.cc",
"src/asmjs/asm-typer.h",
"src/asmjs/asm-types.cc",
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_ASMJS_ASM_NAMES_H_
#define V8_ASMJS_ASM_NAMES_H_
#define STDLIB_MATH_VALUE_LIST(V) \
V(E) \
V(LN10) \
V(LN2) \
V(LOG2E) \
V(LOG10E) \
V(PI) \
V(SQRT1_2) \
V(SQRT2)
// V(stdlib.Math.<name>, Name, wasm-opcode, asm-js-type)
#define STDLIB_MATH_FUNCTION_MONOMORPHIC_LIST(V) \
V(acos, Acos, kExprF64Acos, dq2d) \
V(asin, Asin, kExprF64Asin, dq2d) \
V(atan, Atan, kExprF64Atan, dq2d) \
V(cos, Cos, kExprF64Cos, dq2d) \
V(sin, Sin, kExprF64Sin, dq2d) \
V(tan, Tan, kExprF64Tan, dq2d) \
V(exp, Exp, kExprF64Exp, dq2d) \
V(log, Log, kExprF64Log, dq2d) \
V(atan2, Atan2, kExprF64Atan2, dqdq2d) \
V(pow, Pow, kExprF64Pow, dqdq2d) \
V(imul, Imul, kExprI32Mul, ii2s) \
V(clz32, Clz32, kExprI32Clz, i2s)
// V(stdlib.Math.<name>, Name, unused, asm-js-type)
#define STDLIB_MATH_FUNCTION_CEIL_LIKE_LIST(V) \
V(ceil, Ceil, x, ceil_like) \
V(floor, Floor, x, ceil_like) \
V(sqrt, Sqrt, x, ceil_like)
// V(stdlib.Math.<name>, Name, unused, asm-js-type)
#define STDLIB_MATH_FUNCTION_LIST(V) \
V(min, Min, x, minmax) \
V(max, Max, x, minmax) \
V(abs, Abs, x, abs) \
V(fround, Fround, x, fround) \
STDLIB_MATH_FUNCTION_MONOMORPHIC_LIST(V) \
STDLIB_MATH_FUNCTION_CEIL_LIKE_LIST(V)
// V(stdlib.<name>, wasm-load-type, wasm-store-type, wasm-type)
#define STDLIB_ARRAY_TYPE_LIST(V) \
V(Int8Array, Mem8S, Mem8, I32) \
V(Uint8Array, Mem8U, Mem8, I32) \
V(Int16Array, Mem16S, Mem16, I32) \
V(Uint16Array, Mem16U, Mem16, I32) \
V(Int32Array, Mem, Mem, I32) \
V(Uint32Array, Mem, Mem, I32) \
V(Float32Array, Mem, Mem, F32) \
V(Float64Array, Mem, Mem, F64)
#define STDLIB_OTHER_LIST(V) \
V(Infinity) \
V(NaN) \
V(Math)
// clang-format off (for return)
#define KEYWORD_NAME_LIST(V) \
V(arguments) \
V(break) \
V(case) \
V(const) \
V(continue) \
V(default) \
V(do) \
V(else) \
V(eval) \
V(for) \
V(function) \
V(if) \
V(new) \
V(return ) \
V(switch) \
V(var) \
V(while)
// clang-format on
// V(token-string, token-name)
#define LONG_SYMBOL_NAME_LIST(V) \
V("<=", LE) \
V(">=", GE) \
V("==", EQ) \
V("!=", NE) \
V("<<", SHL) \
V(">>", SAR) \
V(">>>", SHR) \
V("'use asm'", UseAsm)
// clang-format off
#define SIMPLE_SINGLE_TOKEN_LIST(V) \
V('+') V('-') V('*') V('%') V('~') V('^') V('&') V('|') V('(') V(')') \
V('[') V(']') V('{') V('}') V(':') V(';') V(',') V('?')
// clang-format on
// V(name, value, string-name)
#define SPECIAL_TOKEN_LIST(V) \
V(kUninitialized, 0, "{uninitalized}") \
V(kEndOfInput, -1, "{end of input}") \
V(kParseError, -2, "{parse error}") \
V(kUnsigned, -3, "{unsigned value}") \
V(kDouble, -4, "{double value}")
#endif
This diff is collapsed.
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_ASMJS_ASM_SCANNER_H_
#define V8_ASMJS_ASM_SCANNER_H_
#include <memory>
#include <string>
#include <unordered_map>
#include "src/asmjs/asm-names.h"
#include "src/base/logging.h"
#include "src/globals.h"
namespace v8 {
namespace internal {
class Utf16CharacterStream;
// A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/
// This scanner intentionally avoids the portion of JavaScript lexing
// that are not required to determine if code is valid asm.js code.
// * Strings are disallowed except for 'use asm'.
// * Only the subset of keywords needed to check asm.js invariants are
// included.
// * Identifiers are accumulated into local + global string tables
// (for performance).
class V8_EXPORT_PRIVATE AsmJsScanner {
public:
typedef int32_t token_t;
AsmJsScanner();
// Pick the stream to parse (must be called before anything else).
void SetStream(std::unique_ptr<Utf16CharacterStream> stream);
// Get current token.
token_t Token() const { return token_; }
// Advance to the next token.
void Next();
// Back up by one token.
void Rewind();
// Get raw string for current identifier.
const std::string& GetIdentifierString() const {
// Identifier strings don't work after a rewind.
DCHECK(!rewind_);
return identifier_string_;
}
// Check if we just passed a newline.
bool IsPrecededByNewline() const {
// Newline tracking doesn't work if you back up.
DCHECK(!rewind_);
return preceded_by_newline_;
}
#if DEBUG
// Debug only method to go from a token back to its name.
// Slow, only use for debugging.
std::string Name(token_t token) const;
#endif
// Get current position (to use with Seek).
int GetPosition() const;
// Restores old position (token after that position).
void Seek(int pos);
// Select whether identifiers are resolved in global or local scope,
// and which scope new identifiers are added to.
void EnterLocalScope() { in_local_scope_ = true; }
void EnterGlobalScope() { in_local_scope_ = false; }
// Drop all current local identifiers.
void ResetLocals();
// Methods to check if a token is an identifier and which scope.
bool IsLocal() const { return IsLocal(Token()); }
bool IsGlobal() const { return IsGlobal(Token()); }
static bool IsLocal(token_t token) { return token <= kLocalsStart; }
static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
// Methods to find the index position of an identifier (count starting from
// 0 for each scope separately).
static size_t LocalIndex(token_t token) {
DCHECK(IsLocal(token));
return -(token - kLocalsStart);
}
static size_t GlobalIndex(token_t token) {
DCHECK(IsGlobal(token));
return token - kGlobalsStart;
}
// Methods to check if the current token is an asm.js "number" (contains a
// dot) or an "unsigned" (a number without a dot).
bool IsUnsigned() const { return Token() == kUnsigned; }
uint64_t AsUnsigned() const { return unsigned_value_; }
bool IsDouble() const { return Token() == kDouble; }
double AsDouble() const { return double_value_; }
// clang-format off
enum {
// [-10000 .. -10000-kMaxIdentifierCount) :: Local identifiers
// [-10000 .. -1) :: Builtin tokens like keywords
// (also includes some special
// ones like end of input)
// 0 .. 255 :: Single char tokens
// 256 .. 256+kMaxIdentifierCount :: Global identifiers
kLocalsStart = -10000,
#define V(name, _junk1, _junk2, _junk3) kToken_##name,
STDLIB_MATH_FUNCTION_LIST(V)
STDLIB_ARRAY_TYPE_LIST(V)
#undef V
#define V(name) kToken_##name,
STDLIB_OTHER_LIST(V)
STDLIB_MATH_VALUE_LIST(V)
KEYWORD_NAME_LIST(V)
#undef V
#define V(rawname, name) kToken_##name,
LONG_SYMBOL_NAME_LIST(V)
#undef V
#define V(name, value, string_name) name = value,
SPECIAL_TOKEN_LIST(V)
#undef V
kGlobalsStart = 256,
};
// clang-format on
private:
std::unique_ptr<Utf16CharacterStream> stream_;
token_t token_;
token_t preceding_token_;
token_t next_token_;
bool rewind_;
std::string identifier_string_;
bool in_local_scope_;
std::unordered_map<std::string, token_t> local_names_;
std::unordered_map<std::string, token_t> global_names_;
std::unordered_map<std::string, token_t> property_names_;
int global_count_;
double double_value_;
uint64_t unsigned_value_;
bool preceded_by_newline_;
// Consume multiple characters.
void ConsumeIdentifier(uc32 ch);
void ConsumeNumber(uc32 ch);
bool ConsumeCComment();
void ConsumeCPPComment();
void ConsumeString(uc32 quote);
void ConsumeCompareOrShift(uc32 ch);
// Classify character categories.
bool IsIdentifierStart(uc32 ch);
bool IsIdentifierPart(uc32 ch);
bool IsNumberStart(uc32 ch);
};
} // namespace internal
} // namespace v8
#endif
......@@ -551,6 +551,8 @@ DEFINE_BOOL(validate_asm, false, "validate asm.js modules before compiling")
DEFINE_BOOL(suppress_asm_messages, false,
"don't emit asm.js related messages (for golden file testing)")
DEFINE_BOOL(trace_asm_time, false, "log asm.js timing info to the console")
DEFINE_BOOL(trace_asm_scanner, false,
"log tokens encountered by asm.js scanner")
DEFINE_BOOL(dump_wasm_module, false, "dump WASM module bytes")
DEFINE_STRING(dump_wasm_module_path, NULL, "directory to dump wasm modules to")
......
......@@ -6,6 +6,7 @@
#define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#include "include/v8.h" // for v8::ScriptCompiler
#include "src/globals.h"
namespace v8 {
namespace internal {
......@@ -16,7 +17,7 @@ class Utf16CharacterStream;
class RuntimeCallStats;
class String;
class ScannerStream {
class V8_EXPORT_PRIVATE ScannerStream {
public:
static Utf16CharacterStream* For(Handle<String> data);
static Utf16CharacterStream* For(Handle<String> data, int start_pos,
......
......@@ -1165,6 +1165,18 @@ Token::Value Scanner::ScanTemplateContinuation() {
return ScanTemplateSpan();
}
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
return tmp;
}
Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_mapping_url_.length() > 0)
tmp = source_mapping_url_.Internalize(isolate);
return tmp;
}
void Scanner::ScanDecimalDigits() {
while (IsDecimalDigit(c0_))
......
......@@ -330,18 +330,8 @@ class Scanner {
Token::Value ScanTemplateStart();
Token::Value ScanTemplateContinuation();
Handle<String> SourceUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
return tmp;
}
Handle<String> SourceMappingUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_mapping_url_.length() > 0)
tmp = source_mapping_url_.Internalize(isolate);
return tmp;
}
Handle<String> SourceUrl(Isolate* isolate) const;
Handle<String> SourceMappingUrl(Isolate* isolate) const;
bool FoundHtmlComment() const { return found_html_comment_; }
......
......@@ -417,6 +417,9 @@
'arguments.h',
'asmjs/asm-js.cc',
'asmjs/asm-js.h',
'asmjs/asm-names.h',
'asmjs/asm-scanner.cc',
'asmjs/asm-scanner.h',
'asmjs/asm-typer.cc',
'asmjs/asm-typer.h',
'asmjs/asm-types.cc',
......
......@@ -15,6 +15,7 @@ v8_executable("unittests") {
"api/isolate-unittest.cc",
"api/remote-object-unittest.cc",
"api/v8-object-unittest.cc",
"asmjs/asm-scanner-unittest.cc",
"base/atomic-utils-unittest.cc",
"base/bits-unittest.cc",
"base/cpu-unittest.cc",
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/asmjs/asm-scanner.h"
#include "src/objects.h"
#include "src/parsing/scanner-character-streams.h"
#include "src/parsing/scanner.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace v8 {
namespace internal {
#define TOK(t) AsmJsScanner::kToken_##t
class AsmJsScannerTest : public ::testing::Test {
protected:
void SetupSource(const char* source) {
scanner.SetStream(ScannerStream::ForTesting(source));
}
void Skip(AsmJsScanner::token_t t) {
CHECK_EQ(t, scanner.Token());
scanner.Next();
}
void SkipGlobal() {
CHECK(scanner.IsGlobal());
scanner.Next();
}
void SkipLocal() {
CHECK(scanner.IsLocal());
scanner.Next();
}
void CheckForEnd() { CHECK(scanner.Token() == AsmJsScanner::kEndOfInput); }
void CheckForParseError() {
CHECK(scanner.Token() == AsmJsScanner::kParseError);
}
AsmJsScanner scanner;
};
TEST_F(AsmJsScannerTest, SimpleFunction) {
SetupSource("function foo() { return; }");
Skip(TOK(function));
DCHECK_EQ("foo", scanner.GetIdentifierString());
SkipGlobal();
Skip('(');
Skip(')');
Skip('{');
// clang-format off
Skip(TOK(return));
// clang-format on
Skip(';');
Skip('}');
CheckForEnd();
}
TEST_F(AsmJsScannerTest, JSKeywords) {
SetupSource(
"arguments break case const continue\n"
"default do else eval for function\n"
"if new return switch var while\n");
Skip(TOK(arguments));
Skip(TOK(break));
Skip(TOK(case));
Skip(TOK(const));
Skip(TOK(continue));
Skip(TOK(default));
Skip(TOK(do));
Skip(TOK(else));
Skip(TOK(eval));
Skip(TOK(for));
Skip(TOK(function));
Skip(TOK(if));
Skip(TOK(new));
// clang-format off
Skip(TOK(return));
// clang-format on
Skip(TOK(switch));
Skip(TOK(var));
Skip(TOK(while));
CheckForEnd();
}
TEST_F(AsmJsScannerTest, JSOperatorsSpread) {
SetupSource(
"+ - * / % & | ^ ~ << >> >>>\n"
"< > <= >= == !=\n");
Skip('+');
Skip('-');
Skip('*');
Skip('/');
Skip('%');
Skip('&');
Skip('|');
Skip('^');
Skip('~');
Skip(TOK(SHL));
Skip(TOK(SAR));
Skip(TOK(SHR));
Skip('<');
Skip('>');
Skip(TOK(LE));
Skip(TOK(GE));
Skip(TOK(EQ));
Skip(TOK(NE));
CheckForEnd();
}
TEST_F(AsmJsScannerTest, JSOperatorsTight) {
SetupSource(
"+-*/%&|^~<<>> >>>\n"
"<><=>= ==!=\n");
Skip('+');
Skip('-');
Skip('*');
Skip('/');
Skip('%');
Skip('&');
Skip('|');
Skip('^');
Skip('~');
Skip(TOK(SHL));
Skip(TOK(SAR));
Skip(TOK(SHR));
Skip('<');
Skip('>');
Skip(TOK(LE));
Skip(TOK(GE));
Skip(TOK(EQ));
Skip(TOK(NE));
CheckForEnd();
}
TEST_F(AsmJsScannerTest, UsesOfAsm) {
SetupSource("'use asm' \"use asm\"\n");
Skip(TOK(UseAsm));
Skip(TOK(UseAsm));
CheckForEnd();
}
TEST_F(AsmJsScannerTest, DefaultGlobalScope) {
SetupSource("var x = x + x;");
Skip(TOK(var));
CHECK_EQ("x", scanner.GetIdentifierString());
AsmJsScanner::token_t x = scanner.Token();
SkipGlobal();
Skip('=');
Skip(x);
Skip('+');
Skip(x);
Skip(';');
CheckForEnd();
}
TEST_F(AsmJsScannerTest, GlobalScope) {
SetupSource("var x = x + x;");
scanner.EnterGlobalScope();
Skip(TOK(var));
CHECK_EQ("x", scanner.GetIdentifierString());
AsmJsScanner::token_t x = scanner.Token();
SkipGlobal();
Skip('=');
Skip(x);
Skip('+');
Skip(x);
Skip(';');
CheckForEnd();
}
TEST_F(AsmJsScannerTest, LocalScope) {
SetupSource("var x = x + x;");
scanner.EnterLocalScope();
Skip(TOK(var));
CHECK_EQ("x", scanner.GetIdentifierString());
AsmJsScanner::token_t x = scanner.Token();
SkipLocal();
Skip('=');
Skip(x);
Skip('+');
Skip(x);
Skip(';');
CheckForEnd();
}
TEST_F(AsmJsScannerTest, Numbers) {
SetupSource("1 1.2 0x1f 1.e3");
CHECK(scanner.IsUnsigned());
CHECK_EQ(1, scanner.AsUnsigned());
scanner.Next();
CHECK(scanner.IsDouble());
CHECK_EQ(1.2, scanner.AsDouble());
scanner.Next();
CHECK(scanner.IsUnsigned());
CHECK_EQ(31, scanner.AsUnsigned());
scanner.Next();
CHECK(scanner.IsDouble());
CHECK_EQ(1.0e3, scanner.AsDouble());
scanner.Next();
CheckForEnd();
}
TEST_F(AsmJsScannerTest, BadNumber) {
SetupSource(".123fe");
Skip('.');
CheckForParseError();
}
TEST_F(AsmJsScannerTest, Rewind1) {
SetupSource("+ - * /");
Skip('+');
scanner.Rewind();
Skip('+');
Skip('-');
scanner.Rewind();
Skip('-');
Skip('*');
scanner.Rewind();
Skip('*');
Skip('/');
scanner.Rewind();
Skip('/');
CheckForEnd();
}
TEST_F(AsmJsScannerTest, Comments) {
SetupSource(
"var // This is a test /* */ eval\n"
"var /* test *** test */ eval\n"
"function /* this */ ^");
Skip(TOK(var));
Skip(TOK(var));
Skip(TOK(eval));
Skip(TOK(function));
Skip('^');
CheckForEnd();
}
TEST_F(AsmJsScannerTest, TrailingCComment) {
SetupSource("var /* test\n");
Skip(TOK(var));
CheckForParseError();
}
TEST_F(AsmJsScannerTest, Seeking) {
SetupSource("var eval do arguments function break\n");
Skip(TOK(var));
int old_pos = scanner.GetPosition();
Skip(TOK(eval));
Skip(TOK(do));
Skip(TOK(arguments));
scanner.Rewind();
Skip(TOK(arguments));
scanner.Rewind();
scanner.Seek(old_pos);
Skip(TOK(do));
Skip(TOK(arguments));
Skip(TOK(function));
Skip(TOK(break));
CheckForEnd();
}
TEST_F(AsmJsScannerTest, Newlines) {
SetupSource(
"var x = 1\n"
"var y = 2\n");
Skip(TOK(var));
scanner.Next();
Skip('=');
scanner.Next();
CHECK(scanner.IsPrecededByNewline());
Skip(TOK(var));
scanner.Next();
Skip('=');
scanner.Next();
CHECK(scanner.IsPrecededByNewline());
CheckForEnd();
}
} // namespace internal
} // namespace v8
......@@ -13,6 +13,7 @@
'api/isolate-unittest.cc',
'api/remote-object-unittest.cc',
'api/v8-object-unittest.cc',
'asmjs/asm-scanner-unittest.cc',
'base/atomic-utils-unittest.cc',
'base/bits-unittest.cc',
'base/cpu-unittest.cc',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment