Commit 378375d2 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Templatize scan functions by encoding

This way we can avoid reencoding everything to utf16 (buffered) and avoid the
overhead of needing to check the encoding for each character individually.

This may result in a minor asm.js scanning regression due to one-byte tokens
possibly being more common.

Change-Id: I90b51c256d56d4f4fa2d235d7e1e58fc01e43f31
Reviewed-on: https://chromium-review.googlesource.com/1172437
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55217}
parent 28300e33
......@@ -20,7 +20,11 @@ namespace {
static const int kMaxIdentifierCount = 0xF000000;
};
AsmJsScanner::AsmJsScanner(CharacterStream<uint16_t>* stream, int start)
#define SPECIALIZE(Call, ...) \
(stream_->is_two_byte() ? Call<uint16_t>(__VA_ARGS__) \
: Call<uint8_t>(__VA_ARGS__))
AsmJsScanner::AsmJsScanner(ScannerStream* stream, int start)
: stream_(stream),
token_(kUninitialized),
preceding_token_(kUninitialized),
......@@ -34,7 +38,7 @@ AsmJsScanner::AsmJsScanner(CharacterStream<uint16_t>* stream, int start)
double_value_(0.0),
unsigned_value_(0),
preceded_by_newline_(false) {
stream->Seek(start);
SPECIALIZE(DoSeek, start);
#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
STDLIB_MATH_FUNCTION_LIST(V)
STDLIB_ARRAY_TYPE_LIST(V)
......@@ -51,7 +55,10 @@ AsmJsScanner::AsmJsScanner(CharacterStream<uint16_t>* stream, int start)
Next();
}
void AsmJsScanner::Next() {
void AsmJsScanner::Next() { SPECIALIZE(Scan); }
template <typename Char>
void AsmJsScanner::Scan() {
if (rewind_) {
preceding_token_ = token_;
preceding_position_ = position_;
......@@ -85,8 +92,8 @@ void AsmJsScanner::Next() {
preceding_position_ = position_;
for (;;) {
position_ = stream_->pos();
uc32 ch = stream_->Advance();
position_ = Source<Char>()->pos();
uc32 ch = Advance<Char>();
switch (ch) {
case ' ':
case '\t':
......@@ -106,20 +113,20 @@ void AsmJsScanner::Next() {
case '\'':
case '"':
ConsumeString(ch);
ConsumeString<Char>(ch);
return;
case '/':
ch = stream_->Advance();
ch = Advance<Char>();
if (ch == '/') {
ConsumeCPPComment();
ConsumeCPPComment<Char>();
} else if (ch == '*') {
if (!ConsumeCComment()) {
if (!ConsumeCComment<Char>()) {
token_ = kParseError;
return;
}
} else {
stream_->Back();
Back<Char>();
token_ = '/';
return;
}
......@@ -131,7 +138,7 @@ void AsmJsScanner::Next() {
case '>':
case '=':
case '!':
ConsumeCompareOrShift(ch);
ConsumeCompareOrShift<Char>(ch);
return;
#define V(single_char_token) case single_char_token:
......@@ -143,9 +150,9 @@ void AsmJsScanner::Next() {
default:
if (IsIdentifierStart(ch)) {
ConsumeIdentifier(ch);
ConsumeIdentifier<Char>(ch);
} else if (IsNumberStart(ch)) {
ConsumeNumber(ch);
ConsumeNumber<Char>(ch);
} else {
// TODO(bradnelson): Support unicode (probably via UnicodeCache).
token_ = kParseError;
......@@ -213,7 +220,7 @@ std::string AsmJsScanner::Name(token_t token) const {
#endif
void AsmJsScanner::Seek(size_t pos) {
stream_->Seek(pos);
SPECIALIZE(DoSeek, pos);
preceding_token_ = kUninitialized;
token_ = kUninitialized;
next_token_ = kUninitialized;
......@@ -224,15 +231,16 @@ void AsmJsScanner::Seek(size_t pos) {
Next();
}
template <typename Char>
void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
// Consume characters while still part of the identifier.
identifier_string_.clear();
while (IsIdentifierPart(ch)) {
identifier_string_ += ch;
ch = stream_->Advance();
ch = Advance<Char>();
}
// Go back one for next time.
stream_->Back();
Back<Char>();
// Decode what the identifier means.
if (preceding_token_ == '.') {
......@@ -272,13 +280,14 @@ void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
}
}
template <typename Char>
void AsmJsScanner::ConsumeNumber(uc32 ch) {
std::string number;
number = ch;
bool has_dot = ch == '.';
bool has_prefix = false;
for (;;) {
ch = stream_->Advance();
ch = Advance<Char>();
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
(ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
ch == 'x' ||
......@@ -297,7 +306,7 @@ void AsmJsScanner::ConsumeNumber(uc32 ch) {
break;
}
}
stream_->Back();
Back<Char>();
// Special case the most common number.
if (number.size() == 1 && number[0] == '0') {
unsigned_value_ = 0;
......@@ -324,7 +333,7 @@ void AsmJsScanner::ConsumeNumber(uc32 ch) {
// problem.
if (number[0] == '.') {
for (size_t k = 1; k < number.size(); ++k) {
stream_->Back();
Back<Char>();
}
token_ = '.';
return;
......@@ -346,11 +355,12 @@ void AsmJsScanner::ConsumeNumber(uc32 ch) {
}
}
template <typename Char>
bool AsmJsScanner::ConsumeCComment() {
for (;;) {
uc32 ch = stream_->Advance();
uc32 ch = Advance<Char>();
while (ch == '*') {
ch = stream_->Advance();
ch = Advance<Char>();
if (ch == '/') {
return true;
}
......@@ -361,33 +371,36 @@ bool AsmJsScanner::ConsumeCComment() {
}
}
template <typename Char>
void AsmJsScanner::ConsumeCPPComment() {
for (;;) {
uc32 ch = stream_->Advance();
uc32 ch = Advance<Char>();
if (ch == '\n' || ch == kEndOfInput) {
return;
}
}
}
template <typename Char>
void AsmJsScanner::ConsumeString(uc32 quote) {
// Only string allowed is 'use asm' / "use asm".
const char* expected = "use asm";
for (; *expected != '\0'; ++expected) {
if (stream_->Advance() != *expected) {
if (Advance<Char>() != *expected) {
token_ = kParseError;
return;
}
}
if (stream_->Advance() != quote) {
if (Advance<Char>() != quote) {
token_ = kParseError;
return;
}
token_ = kToken_UseAsm;
}
template <typename Char>
void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
uc32 next_ch = stream_->Advance();
uc32 next_ch = Advance<Char>();
if (next_ch == '=') {
switch (ch) {
case '<':
......@@ -408,14 +421,14 @@ void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
} else if (ch == '<' && next_ch == '<') {
token_ = kToken_SHL;
} else if (ch == '>' && next_ch == '>') {
if (stream_->Advance() == '>') {
if (Advance<Char>() == '>') {
token_ = kToken_SHR;
} else {
token_ = kToken_SAR;
stream_->Back();
Back<Char>();
}
} else {
stream_->Back();
Back<Char>();
token_ = ch;
}
}
......@@ -430,5 +443,7 @@ bool AsmJsScanner::IsNumberStart(uc32 ch) {
return ch == '.' || IsDecimalDigit(ch);
}
#undef SPECIALIZE
} // namespace internal
} // namespace v8
......@@ -16,6 +16,7 @@
namespace v8 {
namespace internal {
class ScannerStream;
template <typename Char>
class CharacterStream;
......@@ -32,7 +33,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
public:
typedef int32_t token_t;
AsmJsScanner(CharacterStream<uint16_t>* stream, int start);
AsmJsScanner(ScannerStream* stream, int start);
// Get current token.
token_t Token() const { return token_; }
......@@ -137,7 +138,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
// clang-format on
private:
CharacterStream<uint16_t>* stream_;
ScannerStream* const stream_;
token_t token_;
token_t preceding_token_;
token_t next_token_; // Only set when in {rewind} state.
......@@ -155,12 +156,37 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
uint32_t unsigned_value_;
bool preceded_by_newline_;
template <typename Char>
void Scan();
template <typename Char>
inline CharacterStream<Char>* Source() {
return static_cast<CharacterStream<Char>*>(stream_);
}
template <typename Char>
inline uc32 Advance() {
return Source<Char>()->Advance();
}
template <typename Char>
inline void Back() {
return Source<Char>()->Back();
}
template <typename Char>
void DoSeek(size_t pos) {
Source<Char>()->Seek(pos);
}
// Consume multiple characters.
template <typename Char>
void ConsumeIdentifier(uc32 ch);
template <typename Char>
void ConsumeNumber(uc32 ch);
template <typename Char>
bool ConsumeCComment();
template <typename Char>
void ConsumeCPPComment();
template <typename Char>
void ConsumeString(uc32 quote);
template <typename Char>
void ConsumeCompareOrShift(uc32 ch);
// Classify character categories.
......
......@@ -412,7 +412,8 @@ Parser::Parser(ParseInfo* info)
info->runtime_call_stats(), info->logger(),
info->script().is_null() ? -1 : info->script()->id(),
info->is_module(), true),
scanner_(info->unicode_cache()),
scanner_(info->unicode_cache(), info->character_stream(),
info->is_module()),
reusable_preparser_(nullptr),
mode_(PARSE_EAGERLY), // Lazy mode must be set explicitly.
source_range_map_(info->source_range_map()),
......@@ -507,9 +508,8 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) {
// Initialize parser state.
DeserializeScopeChain(isolate, info, info->maybe_outer_scope_info());
auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
scanner_.Initialize();
FunctionLiteral* result = DoParseProgram(isolate, info);
MaybeResetCharacterStream(info, result);
......@@ -703,9 +703,8 @@ FunctionLiteral* Parser::ParseFunction(Isolate* isolate, ParseInfo* info,
// Initialize parser state.
Handle<String> name(shared_info->Name(), isolate);
info->set_function_name(ast_value_factory()->GetString(name));
auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
scanner_.Initialize();
FunctionLiteral* result =
DoParseFunction(isolate, info, info->function_name());
......@@ -3451,9 +3450,8 @@ void Parser::ParseOnBackground(ParseInfo* info) {
DCHECK_NULL(info->literal());
FunctionLiteral* result = nullptr;
auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
scanner_.Initialize();
DCHECK(info->maybe_outer_scope_info().is_null());
DCHECK(original_scope_);
......
......@@ -250,82 +250,45 @@ class Utf8ChunkedStream : public ChunkedStream<uint16_t> {
bool seen_bom_ = false;
};
// Provides a buffered utf-16 view on the bytes from the underlying ByteStream.
// Chars are buffered if either the underlying stream isn't utf-16 or the
// underlying utf-16 stream might move (is on-heap).
template <template <typename T> class ByteStream>
class BufferedCharacterStream : public CharacterStream<uint16_t> {
public:
template <class... TArgs>
BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
buffer_pos_ = pos;
}
protected:
bool ReadBlock() final {
size_t position = pos();
buffer_pos_ = position;
buffer_start_ = &buffer_[0];
buffer_cursor_ = buffer_start_;
Range<uint8_t> range = byte_stream_.GetDataAt(position);
if (range.length() == 0) {
buffer_end_ = buffer_start_;
return false;
}
size_t length = Min(kBufferSize, range.length());
i::CopyCharsUnsigned(buffer_, range.start, length);
buffer_end_ = &buffer_[length];
return true;
}
bool can_access_heap() final { return ByteStream<uint8_t>::kCanAccessHeap; }
private:
static const size_t kBufferSize = 512;
uc16 buffer_[kBufferSize];
ByteStream<uint8_t> byte_stream_;
};
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
template <template <typename T> class ByteStream>
class UnbufferedCharacterStream : public CharacterStream<uint16_t> {
template <typename Char, template <typename T> class ByteStream>
class UnbufferedCharacterStream : public CharacterStream<Char> {
public:
template <class... TArgs>
UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
buffer_pos_ = pos;
this->buffer_pos_ = pos;
}
protected:
bool ReadBlock() final {
size_t position = pos();
buffer_pos_ = position;
Range<uint16_t> range = byte_stream_.GetDataAt(position);
buffer_start_ = range.start;
buffer_end_ = range.end;
buffer_cursor_ = buffer_start_;
size_t position = this->pos();
this->buffer_pos_ = position;
Range<Char> range = this->byte_stream_.GetDataAt(position);
this->buffer_start_ = range.start;
this->buffer_end_ = range.end;
this->buffer_cursor_ = range.start;
if (range.length() == 0) return false;
DCHECK(!range.unaligned_start());
DCHECK_LE(buffer_start_, buffer_end_);
DCHECK_LE(this->buffer_start_, this->buffer_end_);
return true;
}
bool can_access_heap() final { return ByteStream<uint16_t>::kCanAccessHeap; }
bool can_access_heap() final { return ByteStream<Char>::kCanAccessHeap; }
ByteStream<uint16_t> byte_stream_;
ByteStream<Char> byte_stream_;
};
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
template <typename Char>
class RelocatingCharacterStream
: public UnbufferedCharacterStream<OnHeapStream> {
: public UnbufferedCharacterStream<Char, OnHeapStream> {
public:
template <class... TArgs>
RelocatingCharacterStream(Isolate* isolate, size_t pos, TArgs... args)
: UnbufferedCharacterStream<OnHeapStream>(pos, args...),
: UnbufferedCharacterStream<Char, OnHeapStream>(pos, args...),
isolate_(isolate) {
isolate->heap()->AddGCEpilogueCallback(UpdateBufferPointersCallback,
v8::kGCTypeAll, this);
......@@ -341,16 +304,17 @@ class RelocatingCharacterStream
v8::GCType type,
v8::GCCallbackFlags flags,
void* stream) {
reinterpret_cast<RelocatingCharacterStream*>(stream)
reinterpret_cast<RelocatingCharacterStream<Char>*>(stream)
->UpdateBufferPointers();
}
void UpdateBufferPointers() {
Range<uint16_t> range = byte_stream_.GetDataAt(0);
if (range.start != buffer_start_) {
buffer_cursor_ = (buffer_cursor_ - buffer_start_) + range.start;
buffer_start_ = range.start;
buffer_end_ = range.end;
Range<Char> range = this->byte_stream_.GetDataAt(0);
if (range.start != this->buffer_start_) {
this->buffer_cursor_ =
(this->buffer_cursor_ - this->buffer_start_) + range.start;
this->buffer_start_ = range.start;
this->buffer_end_ = range.end;
}
}
......@@ -360,6 +324,21 @@ class RelocatingCharacterStream
// ----------------------------------------------------------------------------
// ScannerStream: Create stream instances.
#define SPECIALIZE(Call, ...) \
(is_two_byte_ \
? static_cast<CharacterStream<uint16_t>*>(this)->Call(__VA_ARGS__) \
: static_cast<CharacterStream<uint8_t>*>(this)->Call(__VA_ARGS__))
uc32 ScannerStream::Advance() { return SPECIALIZE(Advance); }
void ScannerStream::Seek(size_t pos) { SPECIALIZE(Seek, pos); }
size_t ScannerStream::pos() { return SPECIALIZE(pos); }
void ScannerStream::Back() { SPECIALIZE(Back); }
#undef SPECIALIZE
ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data) {
return ScannerStream::For(isolate, data, 0, data->length());
}
......@@ -380,21 +359,22 @@ ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
data = String::Flatten(isolate, data);
}
if (data->IsExternalOneByteString()) {
return new BufferedCharacterStream<ExternalStringStream>(
return new UnbufferedCharacterStream<uint8_t, ExternalStringStream>(
static_cast<size_t>(start_pos),
ExternalOneByteString::cast(*data)->GetChars() + start_offset,
static_cast<size_t>(end_pos));
} else if (data->IsExternalTwoByteString()) {
return new UnbufferedCharacterStream<ExternalStringStream>(
return new UnbufferedCharacterStream<uint16_t, ExternalStringStream>(
static_cast<size_t>(start_pos),
ExternalTwoByteString::cast(*data)->GetChars() + start_offset,
static_cast<size_t>(end_pos));
} else if (data->IsSeqOneByteString()) {
return new BufferedCharacterStream<OnHeapStream>(
static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data),
start_offset, static_cast<size_t>(end_pos));
return new RelocatingCharacterStream<uint8_t>(
isolate, static_cast<size_t>(start_pos),
Handle<SeqOneByteString>::cast(data), start_offset,
static_cast<size_t>(end_pos));
} else if (data->IsSeqTwoByteString()) {
return new RelocatingCharacterStream(
return new RelocatingCharacterStream<uint16_t>(
isolate, static_cast<size_t>(start_pos),
Handle<SeqTwoByteString>::cast(data), start_offset,
static_cast<size_t>(end_pos));
......@@ -403,15 +383,15 @@ ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
}
}
std::unique_ptr<CharacterStream<uint16_t>> ScannerStream::ForTesting(
std::unique_ptr<CharacterStream<uint8_t>> ScannerStream::ForTesting(
const char* data) {
return ScannerStream::ForTesting(data, strlen(data));
}
std::unique_ptr<CharacterStream<uint16_t>> ScannerStream::ForTesting(
std::unique_ptr<CharacterStream<uint8_t>> ScannerStream::ForTesting(
const char* data, size_t length) {
return std::unique_ptr<CharacterStream<uint16_t>>(
new BufferedCharacterStream<ExternalStringStream>(
return std::unique_ptr<CharacterStream<uint8_t>>(
new UnbufferedCharacterStream<uint8_t, ExternalStringStream>(
static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
static_cast<size_t>(length)));
}
......@@ -422,13 +402,13 @@ ScannerStream* ScannerStream::For(
RuntimeCallStats* stats) {
switch (encoding) {
case v8::ScriptCompiler::StreamedSource::TWO_BYTE:
return new UnbufferedCharacterStream<ChunkedStream>(
return new UnbufferedCharacterStream<uint16_t, ChunkedStream>(
static_cast<size_t>(0), source_stream, stats);
case v8::ScriptCompiler::StreamedSource::ONE_BYTE:
return new BufferedCharacterStream<ChunkedStream>(static_cast<size_t>(0),
source_stream, stats);
return new UnbufferedCharacterStream<uint8_t, ChunkedStream>(
static_cast<size_t>(0), source_stream, stats);
case v8::ScriptCompiler::StreamedSource::UTF8:
return new UnbufferedCharacterStream<Utf8ChunkedStream>(
return new UnbufferedCharacterStream<uint16_t, Utf8ChunkedStream>(
static_cast<size_t>(0), source_stream, stats);
}
UNREACHABLE();
......
......@@ -32,19 +32,27 @@ class V8_EXPORT_PRIVATE ScannerStream {
RuntimeCallStats* stats);
// For testing:
static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(
const char* data);
static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(const char* data,
static std::unique_ptr<CharacterStream<uint8_t>> ForTesting(const char* data);
static std::unique_ptr<CharacterStream<uint8_t>> ForTesting(const char* data,
size_t length);
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
virtual uc32 Advance() = 0;
virtual void Seek(size_t pos) = 0;
virtual size_t pos() const = 0;
virtual void Back() = 0;
uc32 Advance();
void Seek(size_t pos);
size_t pos();
void Back();
void Back2();
virtual ~ScannerStream() {}
bool is_two_byte() const { return is_two_byte_; }
protected:
explicit ScannerStream(bool is_two_byte) : is_two_byte_(is_two_byte) {}
private:
const bool is_two_byte_;
};
template <typename Char>
......@@ -52,7 +60,7 @@ class CharacterStream : public ScannerStream {
public:
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() final {
inline uc32 Advance() {
uc32 result = Peek();
buffer_cursor_++;
return result;
......@@ -95,7 +103,7 @@ class CharacterStream : public ScannerStream {
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() final {
inline void Back() {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
......@@ -106,11 +114,11 @@ class CharacterStream : public ScannerStream {
}
}
inline size_t pos() const final {
inline size_t pos() const {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) final {
inline void Seek(size_t pos) {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
......@@ -119,13 +127,11 @@ class CharacterStream : public ScannerStream {
}
}
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
protected:
CharacterStream(const uint16_t* buffer_start, const uint16_t* buffer_cursor,
const uint16_t* buffer_end, size_t buffer_pos)
: buffer_start_(buffer_start),
CharacterStream(const Char* buffer_start, const Char* buffer_cursor,
const Char* buffer_end, size_t buffer_pos)
: ScannerStream(sizeof(Char) == 2),
buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
......
......@@ -11,8 +11,9 @@
namespace v8 {
namespace internal {
template <typename Char>
V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
int start_position = source_pos();
int start_position = SourcePos<Char>();
while (true) {
// We won't skip behind the end of input.
......@@ -25,11 +26,11 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
} else if (!unicode_cache_->IsWhiteSpace(c0_)) {
break;
}
Advance();
Advance<Char>();
}
// Return whether or not we skipped any characters.
if (source_pos() == start_position) {
if (SourcePos<Char>() == start_position) {
DCHECK_NE('0', c0_);
return Token::ILLEGAL;
}
......
......@@ -170,33 +170,29 @@ bool Scanner::BookmarkScope::HasBeenApplied() {
// ----------------------------------------------------------------------------
// Scanner
Scanner::Scanner(UnicodeCache* unicode_cache)
: unicode_cache_(unicode_cache),
#define SPECIALIZE(Call, ...) \
(source_->is_two_byte() ? Call<uint16_t>(__VA_ARGS__) \
: Call<uint8_t>(__VA_ARGS__))
Scanner::Scanner(UnicodeCache* unicode_cache, ScannerStream* source,
bool is_module)
: is_module_(is_module),
unicode_cache_(unicode_cache),
source_(source),
octal_pos_(Location::invalid()),
octal_message_(MessageTemplate::kNone),
has_line_terminator_before_next_(false),
has_line_terminator_before_next_(true),
has_multiline_comment_before_next_(false),
has_line_terminator_after_next_(false),
found_html_comment_(false),
allow_harmony_bigint_(false),
allow_harmony_numeric_separator_(false) {}
void Scanner::Initialize(CharacterStream<uint16_t>* source, bool is_module) {
DCHECK_NOT_NULL(source);
source_ = source;
is_module_ = is_module;
// Need to capture identifiers in order to recognize "get" and "set"
// in object literals.
Init();
has_line_terminator_before_next_ = true;
Scan();
}
template <bool capture_raw, bool unicode>
template <typename Char, bool capture_raw, bool unicode>
uc32 Scanner::ScanHexNumber(int expected_length) {
DCHECK_LE(expected_length, 4); // prevent overflow
int begin = source_pos() - 2;
int begin = SourcePos<Char>() - 2;
uc32 x = 0;
for (int i = 0; i < expected_length; i++) {
int d = HexValue(c0_);
......@@ -208,13 +204,13 @@ uc32 Scanner::ScanHexNumber(int expected_length) {
return -1;
}
x = x * 16 + d;
Advance<capture_raw>();
Advance<Char, capture_raw>();
}
return x;
}
template <bool capture_raw>
template <typename Char, bool capture_raw>
uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
uc32 x = 0;
int d = HexValue(c0_);
......@@ -223,11 +219,11 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
while (d >= 0) {
x = x * 16 + d;
if (x > max_value) {
ReportScannerError(Location(beg_pos, source_pos() + 1),
ReportScannerError(Location(beg_pos, SourcePos<Char>() + 1),
MessageTemplate::kUndefinedUnicodeCodePoint);
return -1;
}
Advance<capture_raw>();
Advance<Char, capture_raw>();
d = HexValue(c0_);
}
......@@ -387,7 +383,7 @@ Token::Value Scanner::Next() {
}
has_line_terminator_before_next_ = false;
has_multiline_comment_before_next_ = false;
Scan();
SPECIALIZE(Scan);
return current_.token;
}
......@@ -413,47 +409,51 @@ Token::Value Scanner::PeekAhead() {
return ret;
}
template <typename Char>
Token::Value Scanner::SkipSingleHTMLComment() {
if (is_module_) {
ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
ReportScannerError(SourcePos<Char>(),
MessageTemplate::kHtmlCommentInModule);
return Token::ILLEGAL;
}
return SkipSingleLineComment();
return SkipSingleLineComment<Char>();
}
template <typename Char>
Token::Value Scanner::SkipSingleLineComment() {
// The line terminator at the end of the line is not considered
// to be part of the single-line comment; it is recognized
// separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar (see
// ECMA-262, section 7.4).
AdvanceUntil([](uc32 c0_) { return unibrow::IsLineTerminator(c0_); });
AdvanceUntil<Char>([](uc32 c0_) { return unibrow::IsLineTerminator(c0_); });
return Token::WHITESPACE;
}
template <typename Char>
Token::Value Scanner::SkipSourceURLComment() {
TryToParseSourceURLComment();
TryToParseSourceURLComment<Char>();
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
Advance();
Advance<Char>();
}
return Token::WHITESPACE;
}
template <typename Char>
void Scanner::TryToParseSourceURLComment() {
// Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
// function will just return if it cannot parse a magic comment.
DCHECK(!unicode_cache_->IsWhiteSpaceOrLineTerminator(kEndOfInput));
if (!unicode_cache_->IsWhiteSpace(c0_)) return;
Advance();
Advance<Char>();
LiteralBuffer name;
while (c0_ != kEndOfInput &&
!unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
name.AddChar(c0_);
Advance();
Advance<Char>();
}
if (!name.is_one_byte()) return;
Vector<const uint8_t> name_literal = name.one_byte_literal();
......@@ -467,10 +467,10 @@ void Scanner::TryToParseSourceURLComment() {
}
if (c0_ != '=')
return;
Advance();
Advance<Char>();
value->Reset();
while (unicode_cache_->IsWhiteSpace(c0_)) {
Advance();
Advance<Char>();
}
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
// Disallowed characters.
......@@ -482,7 +482,7 @@ void Scanner::TryToParseSourceURLComment() {
break;
}
value->AddChar(c0_);
Advance();
Advance<Char>();
}
// Allow whitespace at the end.
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
......@@ -490,13 +490,14 @@ void Scanner::TryToParseSourceURLComment() {
value->Reset();
break;
}
Advance();
Advance<Char>();
}
}
template <typename Char>
Token::Value Scanner::SkipMultiLineComment() {
DCHECK_EQ(c0_, '*');
Advance();
Advance<Char>();
while (c0_ != kEndOfInput) {
DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
......@@ -507,33 +508,35 @@ Token::Value Scanner::SkipMultiLineComment() {
}
while (V8_UNLIKELY(c0_ == '*')) {
Advance();
Advance<Char>();
if (c0_ == '/') {
Advance();
Advance<Char>();
return Token::WHITESPACE;
}
}
Advance();
Advance<Char>();
}
// Unterminated multi-line comment.
return Token::ILLEGAL;
}
template <typename Char>
Token::Value Scanner::ScanHtmlComment() {
// Check for <!-- comments.
DCHECK_EQ(c0_, '!');
Advance();
if (c0_ != '-' || Peek() != '-') {
PushBack('!'); // undo Advance()
Advance<Char>();
if (c0_ != '-' || Peek<Char>() != '-') {
PushBack<Char>('!'); // undo Advance()
return Token::LT;
}
Advance();
Advance<Char>();
found_html_comment_ = true;
return SkipSingleHTMLComment();
return SkipSingleHTMLComment<Char>();
}
template <typename Char>
void Scanner::Scan() {
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
......@@ -544,34 +547,34 @@ void Scanner::Scan() {
if (static_cast<unsigned>(c0_) <= 0x7F) {
Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
if (token != Token::ILLEGAL) {
int pos = source_pos();
int pos = SourcePos<Char>();
next_.token = token;
next_.contextual_token = Token::UNINITIALIZED;
next_.location.beg_pos = pos;
next_.location.end_pos = pos + 1;
Advance();
Advance<Char>();
return;
}
}
// Remember the position of the next token
next_.location.beg_pos = source_pos();
next_.location.beg_pos = SourcePos<Char>();
switch (c0_) {
case '"':
case '\'':
token = ScanString();
token = ScanString<Char>();
break;
case '<':
// < <= << <<= <!--
Advance();
Advance<Char>();
if (c0_ == '=') {
token = Select(Token::LTE);
token = Select<Char>(Token::LTE);
} else if (c0_ == '<') {
token = Select('=', Token::ASSIGN_SHL, Token::SHL);
token = Select<Char>('=', Token::ASSIGN_SHL, Token::SHL);
} else if (c0_ == '!') {
token = ScanHtmlComment();
token = ScanHtmlComment<Char>();
} else {
token = Token::LT;
}
......@@ -579,16 +582,16 @@ void Scanner::Scan() {
case '>':
// > >= >> >>= >>> >>>=
Advance();
Advance<Char>();
if (c0_ == '=') {
token = Select(Token::GTE);
token = Select<Char>(Token::GTE);
} else if (c0_ == '>') {
// >> >>= >>> >>>=
Advance();
Advance<Char>();
if (c0_ == '=') {
token = Select(Token::ASSIGN_SAR);
token = Select<Char>(Token::ASSIGN_SAR);
} else if (c0_ == '>') {
token = Select('=', Token::ASSIGN_SHR, Token::SHR);
token = Select<Char>('=', Token::ASSIGN_SHR, Token::SHR);
} else {
token = Token::SAR;
}
......@@ -599,11 +602,11 @@ void Scanner::Scan() {
case '=':
// = == === =>
Advance();
Advance<Char>();
if (c0_ == '=') {
token = Select('=', Token::EQ_STRICT, Token::EQ);
token = Select<Char>('=', Token::EQ_STRICT, Token::EQ);
} else if (c0_ == '>') {
token = Select(Token::ARROW);
token = Select<Char>(Token::ARROW);
} else {
token = Token::ASSIGN;
}
......@@ -611,9 +614,9 @@ void Scanner::Scan() {
case '!':
// ! != !==
Advance();
Advance<Char>();
if (c0_ == '=') {
token = Select('=', Token::NE_STRICT, Token::NE);
token = Select<Char>('=', Token::NE_STRICT, Token::NE);
} else {
token = Token::NOT;
}
......@@ -621,11 +624,11 @@ void Scanner::Scan() {
case '+':
// + ++ +=
Advance();
Advance<Char>();
if (c0_ == '+') {
token = Select(Token::INC);
token = Select<Char>(Token::INC);
} else if (c0_ == '=') {
token = Select(Token::ASSIGN_ADD);
token = Select<Char>(Token::ASSIGN_ADD);
} else {
token = Token::ADD;
}
......@@ -633,18 +636,18 @@ void Scanner::Scan() {
case '-':
// - -- --> -=
Advance();
Advance<Char>();
if (c0_ == '-') {
Advance();
Advance<Char>();
if (c0_ == '>' && HasAnyLineTerminatorBeforeNext()) {
// For compatibility with SpiderMonkey, we skip lines that
// start with an HTML comment end '-->'.
token = SkipSingleHTMLComment();
token = SkipSingleHTMLComment<Char>();
} else {
token = Token::DEC;
}
} else if (c0_ == '=') {
token = Select(Token::ASSIGN_SUB);
token = Select<Char>(Token::ASSIGN_SUB);
} else {
token = Token::SUB;
}
......@@ -652,11 +655,11 @@ void Scanner::Scan() {
case '*':
// * *=
Advance();
Advance<Char>();
if (c0_ == '*') {
token = Select('=', Token::ASSIGN_EXP, Token::EXP);
token = Select<Char>('=', Token::ASSIGN_EXP, Token::EXP);
} else if (c0_ == '=') {
token = Select(Token::ASSIGN_MUL);
token = Select<Char>(Token::ASSIGN_MUL);
} else {
token = Token::MUL;
}
......@@ -664,25 +667,25 @@ void Scanner::Scan() {
case '%':
// % %=
token = Select('=', Token::ASSIGN_MOD, Token::MOD);
token = Select<Char>('=', Token::ASSIGN_MOD, Token::MOD);
break;
case '/':
// / // /* /=
Advance();
Advance<Char>();
if (c0_ == '/') {
Advance();
Advance<Char>();
if (c0_ == '#' || c0_ == '@') {
Advance();
token = SkipSourceURLComment();
Advance<Char>();
token = SkipSourceURLComment<Char>();
} else {
PushBack(c0_);
token = SkipSingleLineComment();
PushBack<Char>(c0_);
token = SkipSingleLineComment<Char>();
}
} else if (c0_ == '*') {
token = SkipMultiLineComment();
token = SkipMultiLineComment<Char>();
} else if (c0_ == '=') {
token = Select(Token::ASSIGN_DIV);
token = Select<Char>(Token::ASSIGN_DIV);
} else {
token = Token::DIV;
}
......@@ -690,11 +693,11 @@ void Scanner::Scan() {
case '&':
// & && &=
Advance();
Advance<Char>();
if (c0_ == '&') {
token = Select(Token::AND);
token = Select<Char>(Token::AND);
} else if (c0_ == '=') {
token = Select(Token::ASSIGN_BIT_AND);
token = Select<Char>(Token::ASSIGN_BIT_AND);
} else {
token = Token::BIT_AND;
}
......@@ -702,11 +705,11 @@ void Scanner::Scan() {
case '|':
// | || |=
Advance();
Advance<Char>();
if (c0_ == '|') {
token = Select(Token::OR);
token = Select<Char>(Token::OR);
} else if (c0_ == '=') {
token = Select(Token::ASSIGN_BIT_OR);
token = Select<Char>(Token::ASSIGN_BIT_OR);
} else {
token = Token::BIT_OR;
}
......@@ -714,48 +717,48 @@ void Scanner::Scan() {
case '^':
// ^ ^=
token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
token = Select<Char>('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
break;
case '.':
// . Number
Advance();
Advance<Char>();
if (IsDecimalDigit(c0_)) {
token = ScanNumber(true);
token = ScanNumber<Char>(true);
} else {
token = Token::PERIOD;
if (c0_ == '.') {
Advance();
Advance<Char>();
if (c0_ == '.') {
Advance();
Advance<Char>();
token = Token::ELLIPSIS;
} else {
PushBack('.');
PushBack<Char>('.');
}
}
}
break;
case '`':
token = ScanTemplateStart();
token = ScanTemplateStart<Char>();
break;
case '#':
token = ScanPrivateName();
token = ScanPrivateName<Char>();
break;
default:
if (unicode_cache_->IsIdentifierStart(c0_) ||
(CombineSurrogatePair() &&
(CombineSurrogatePair<Char>() &&
unicode_cache_->IsIdentifierStart(c0_))) {
token = ScanIdentifierOrKeyword();
token = ScanIdentifierOrKeyword<Char>();
} else if (IsDecimalDigit(c0_)) {
token = ScanNumber(false);
token = ScanNumber<Char>(false);
} else if (c0_ == kEndOfInput) {
token = Token::EOS;
} else {
token = SkipWhiteSpace();
if (token == Token::ILLEGAL) Advance();
token = SkipWhiteSpace<Char>();
if (token == Token::ILLEGAL) Advance<Char>();
}
break;
}
......@@ -764,7 +767,7 @@ void Scanner::Scan() {
// whitespace.
} while (token == Token::WHITESPACE);
next_.location.end_pos = source_pos();
next_.location.end_pos = SourcePos<Char>();
if (Token::IsContextualKeyword(token)) {
next_.token = Token::IDENTIFIER;
next_.contextual_token = token;
......@@ -834,32 +837,32 @@ void Scanner::SeekForward(int pos) {
// After this call, we will have the token at the given position as
// the "next" token. The "current" token will be invalid.
if (pos == next_.location.beg_pos) return;
int current_pos = source_pos();
int current_pos = SPECIALIZE(SourcePos);
DCHECK_EQ(next_.location.end_pos, current_pos);
// Positions inside the lookahead token aren't supported.
DCHECK(pos >= current_pos);
if (pos != current_pos) {
source_->Seek(pos);
Advance();
SPECIALIZE(Seek, pos);
SPECIALIZE(Advance);
// This function is only called to seek to the location
// of the end of a function (at the "}" token). It doesn't matter
// whether there was a line terminator in the part we skip.
has_line_terminator_before_next_ = false;
has_multiline_comment_before_next_ = false;
}
Scan();
SPECIALIZE(Scan);
}
template <bool capture_raw>
template <typename Char, bool capture_raw>
bool Scanner::ScanEscape() {
uc32 c = c0_;
Advance<capture_raw>();
Advance<Char, capture_raw>();
// Skip escaped newlines.
DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
if (!capture_raw && unibrow::IsLineTerminator(c)) {
// Allow escaped CR+LF newlines in multiline string literals.
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<Char>();
return true;
}
......@@ -873,7 +876,7 @@ bool Scanner::ScanEscape() {
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
case 'u' : {
c = ScanUnicodeEscape<capture_raw>();
c = ScanUnicodeEscape<Char, capture_raw>();
if (c < 0) return false;
break;
}
......@@ -881,7 +884,7 @@ bool Scanner::ScanEscape() {
c = '\v';
break;
case 'x': {
c = ScanHexNumber<capture_raw>(2);
c = ScanHexNumber<Char, capture_raw>(2);
if (c < 0) return false;
break;
}
......@@ -893,7 +896,7 @@ bool Scanner::ScanEscape() {
case '5': // fall through
case '6': // fall through
case '7':
c = ScanOctalEscape<capture_raw>(c, 2);
c = ScanOctalEscape<Char, capture_raw>(c, 2);
break;
}
......@@ -902,7 +905,7 @@ bool Scanner::ScanEscape() {
return true;
}
template <bool capture_raw>
template <typename Char, bool capture_raw>
uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
uc32 x = c - '0';
int i = 0;
......@@ -912,7 +915,7 @@ uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
int nx = x * 8 + d;
if (nx >= 256) break;
x = nx;
Advance<capture_raw>();
Advance<Char, capture_raw>();
}
// Anything except '\0' is an octal escape sequence, illegal in strict mode.
// Remember the position of octal escape sequences so that an error
......@@ -920,62 +923,64 @@ uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
// We don't report the error immediately, because the octal escape can
// occur before the "use strict" directive.
if (c != '0' || i > 0 || c0_ == '8' || c0_ == '9') {
octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
octal_pos_ = Location(SourcePos<Char>() - i - 1, SourcePos<Char>() - 1);
octal_message_ = capture_raw ? MessageTemplate::kTemplateOctalLiteral
: MessageTemplate::kStrictOctalEscape;
}
return x;
}
template <typename Char>
Token::Value Scanner::ScanString() {
uc32 quote = c0_;
Advance(); // consume quote
Advance<Char>(); // consume quote
LiteralScope literal(this);
while (true) {
if (c0_ == quote) {
literal.Complete();
Advance();
Advance<Char>();
return Token::STRING;
}
if (c0_ == kEndOfInput || unibrow::IsStringLiteralLineTerminator(c0_)) {
return Token::ILLEGAL;
}
if (c0_ == '\\') {
Advance();
Advance<Char>();
// TODO(verwaest): Check whether we can remove the additional check.
if (c0_ == kEndOfInput || !ScanEscape<false>()) {
if (c0_ == kEndOfInput || !ScanEscape<Char, false>()) {
return Token::ILLEGAL;
}
continue;
}
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
}
template <typename Char>
Token::Value Scanner::ScanPrivateName() {
if (!allow_harmony_private_fields()) {
ReportScannerError(source_pos(),
ReportScannerError(SourcePos<Char>(),
MessageTemplate::kInvalidOrUnexpectedToken);
return Token::ILLEGAL;
}
LiteralScope literal(this);
DCHECK_EQ(c0_, '#');
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
DCHECK(!unicode_cache_->IsIdentifierStart(kEndOfInput));
if (!unicode_cache_->IsIdentifierStart(c0_)) {
PushBack(c0_);
ReportScannerError(source_pos(),
PushBack<Char>(c0_);
ReportScannerError(SourcePos<Char>(),
MessageTemplate::kInvalidOrUnexpectedToken);
return Token::ILLEGAL;
}
Token::Value token = ScanIdentifierOrKeywordInner(&literal);
Token::Value token = ScanIdentifierOrKeywordInner<Char>(&literal);
return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME;
}
template <typename Char>
Token::Value Scanner::ScanTemplateSpan() {
// When scanning a TemplateSpan, we are looking for the following construct:
// TEMPLATE_SPAN ::
......@@ -1002,12 +1007,12 @@ Token::Value Scanner::ScanTemplateSpan() {
const bool capture_raw = true;
while (true) {
uc32 c = c0_;
Advance();
Advance<Char>();
if (c == '`') {
result = Token::TEMPLATE_TAIL;
break;
} else if (c == '$' && c0_ == '{') {
Advance(); // Consume '{'
Advance<Char>(); // Consume '{'
break;
} else if (c == '\\') {
DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
......@@ -1016,15 +1021,15 @@ Token::Value Scanner::ScanTemplateSpan() {
// The TV of LineContinuation :: \ LineTerminatorSequence is the empty
// code unit sequence.
uc32 lastChar = c0_;
Advance();
Advance<Char>();
if (lastChar == '\r') {
// Also skip \n.
if (c0_ == '\n') Advance();
if (c0_ == '\n') Advance<Char>();
lastChar = '\n';
}
if (capture_raw) AddRawLiteralChar(lastChar);
} else {
bool success = ScanEscape<capture_raw>();
bool success = ScanEscape<Char, capture_raw>();
USE(success);
DCHECK_EQ(!success, has_error());
// For templates, invalid escape sequence checking is handled in the
......@@ -1034,14 +1039,14 @@ Token::Value Scanner::ScanTemplateSpan() {
}
} else if (c < 0) {
// Unterminated template literal
PushBack(c);
PushBack<Char>(c);
break;
} else {
// The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
// The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
// consisting of the CV 0x000A.
if (c == '\r') {
if (c0_ == '\n') Advance(); // Skip \n
if (c0_ == '\n') Advance<Char>(); // Skip \n
c = '\n';
}
if (capture_raw) AddRawLiteralChar(c);
......@@ -1049,20 +1054,20 @@ Token::Value Scanner::ScanTemplateSpan() {
}
}
literal.Complete();
next_.location.end_pos = source_pos();
next_.location.end_pos = SourcePos<Char>();
next_.token = result;
next_.contextual_token = Token::UNINITIALIZED;
return result;
}
template <typename Char>
Token::Value Scanner::ScanTemplateStart() {
DCHECK_EQ(next_next_.token, Token::UNINITIALIZED);
DCHECK_EQ(c0_, '`');
next_.location.beg_pos = source_pos();
Advance(); // Consume `
return ScanTemplateSpan();
next_.location.beg_pos = SourcePos<Char>();
Advance<Char>(); // Consume `
return ScanTemplateSpan<Char>();
}
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
......@@ -1078,6 +1083,7 @@ Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
return tmp;
}
template <typename Char>
bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
bool is_check_first_digit) {
// we must have at least one digit after 'x'/'b'/'o'
......@@ -1086,9 +1092,9 @@ bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
bool separator_seen = false;
while (predicate(c0_) || c0_ == '_') {
if (c0_ == '_') {
Advance();
Advance<Char>();
if (c0_ == '_') {
ReportScannerError(Location(source_pos(), source_pos() + 1),
ReportScannerError(Location(SourcePos<Char>(), SourcePos<Char>() + 1),
MessageTemplate::kContinuousNumericSeparator);
return false;
}
......@@ -1096,11 +1102,11 @@ bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
continue;
}
separator_seen = false;
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
if (separator_seen) {
ReportScannerError(Location(source_pos(), source_pos() + 1),
ReportScannerError(Location(SourcePos<Char>(), SourcePos<Char>() + 1),
MessageTemplate::kTrailingNumericSeparator);
return false;
}
......@@ -1108,23 +1114,25 @@ bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
return true;
}
template <typename Char>
bool Scanner::ScanDecimalDigits() {
if (allow_harmony_numeric_separator()) {
return ScanDigitsWithNumericSeparators(&IsDecimalDigit, false);
return ScanDigitsWithNumericSeparators<Char>(&IsDecimalDigit, false);
}
while (IsDecimalDigit(c0_)) {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
return true;
}
template <typename Char>
bool Scanner::ScanDecimalAsSmiWithNumericSeparators(uint64_t* value) {
bool separator_seen = false;
while (IsDecimalDigit(c0_) || c0_ == '_') {
if (c0_ == '_') {
Advance();
Advance<Char>();
if (c0_ == '_') {
ReportScannerError(Location(source_pos(), source_pos() + 1),
ReportScannerError(Location(SourcePos<Char>(), SourcePos<Char>() + 1),
MessageTemplate::kContinuousNumericSeparator);
return false;
}
......@@ -1133,13 +1141,11 @@ bool Scanner::ScanDecimalAsSmiWithNumericSeparators(uint64_t* value) {
}
separator_seen = false;
*value = 10 * *value + (c0_ - '0');
uc32 first_char = c0_;
Advance();
AddLiteralChar(first_char);
AddLiteralCharAdvance<Char>();
}
if (separator_seen) {
ReportScannerError(Location(source_pos(), source_pos() + 1),
ReportScannerError(Location(SourcePos<Char>(), SourcePos<Char>() + 1),
MessageTemplate::kTrailingNumericSeparator);
return false;
}
......@@ -1147,23 +1153,23 @@ bool Scanner::ScanDecimalAsSmiWithNumericSeparators(uint64_t* value) {
return true;
}
template <typename Char>
bool Scanner::ScanDecimalAsSmi(uint64_t* value) {
if (allow_harmony_numeric_separator()) {
return ScanDecimalAsSmiWithNumericSeparators(value);
return ScanDecimalAsSmiWithNumericSeparators<Char>(value);
}
while (IsDecimalDigit(c0_)) {
*value = 10 * *value + (c0_ - '0');
uc32 first_char = c0_;
Advance();
AddLiteralChar(first_char);
AddLiteralCharAdvance<Char>();
}
return true;
}
template <typename Char>
bool Scanner::ScanBinaryDigits() {
if (allow_harmony_numeric_separator()) {
return ScanDigitsWithNumericSeparators(&IsBinaryDigit, true);
return ScanDigitsWithNumericSeparators<Char>(&IsBinaryDigit, true);
}
// we must have at least one binary digit after 'b'/'B'
......@@ -1172,14 +1178,15 @@ bool Scanner::ScanBinaryDigits() {
}
while (IsBinaryDigit(c0_)) {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
return true;
}
template <typename Char>
bool Scanner::ScanOctalDigits() {
if (allow_harmony_numeric_separator()) {
return ScanDigitsWithNumericSeparators(&IsOctalDigit, true);
return ScanDigitsWithNumericSeparators<Char>(&IsOctalDigit, true);
}
// we must have at least one octal digit after 'o'/'O'
......@@ -1188,11 +1195,12 @@ bool Scanner::ScanOctalDigits() {
}
while (IsOctalDigit(c0_)) {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
return true;
}
template <typename Char>
bool Scanner::ScanImplicitOctalDigits(int start_pos,
Scanner::NumberKind* kind) {
*kind = IMPLICIT_OCTAL;
......@@ -1205,17 +1213,18 @@ bool Scanner::ScanImplicitOctalDigits(int start_pos,
}
if (c0_ < '0' || '7' < c0_) {
// Octal literal finished.
octal_pos_ = Location(start_pos, source_pos());
octal_pos_ = Location(start_pos, SourcePos<Char>());
octal_message_ = MessageTemplate::kStrictOctalLiteral;
return true;
}
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
}
template <typename Char>
bool Scanner::ScanHexDigits() {
if (allow_harmony_numeric_separator()) {
return ScanDigitsWithNumericSeparators(&IsHexDigit, true);
return ScanDigitsWithNumericSeparators<Char>(&IsHexDigit, true);
}
// we must have at least one hex digit after 'x'/'X'
......@@ -1224,18 +1233,20 @@ bool Scanner::ScanHexDigits() {
}
while (IsHexDigit(c0_)) {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
return true;
}
template <typename Char>
bool Scanner::ScanSignedInteger() {
if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance();
if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance<Char>();
// we must have at least one decimal digit after 'e'/'E'
if (!IsDecimalDigit(c0_)) return false;
return ScanDecimalDigits();
return ScanDecimalDigits<Char>();
}
template <typename Char>
Token::Value Scanner::ScanNumber(bool seen_period) {
DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
......@@ -1243,7 +1254,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
LiteralScope literal(this);
bool at_start = !seen_period;
int start_pos = source_pos(); // For reporting octal positions.
int start_pos = SourcePos<Char>(); // For reporting octal positions.
if (seen_period) {
// we have already seen a decimal point of the float
AddLiteralChar('.');
......@@ -1251,29 +1262,29 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
return Token::ILLEGAL;
}
// we know we have at least one digit
if (!ScanDecimalDigits()) return Token::ILLEGAL;
if (!ScanDecimalDigits<Char>()) return Token::ILLEGAL;
} else {
// if the first character is '0' we must check for octals and hex
if (c0_ == '0') {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
// either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
// an octal number.
if (c0_ == 'x' || c0_ == 'X') {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
kind = HEX;
if (!ScanHexDigits()) return Token::ILLEGAL;
if (!ScanHexDigits<Char>()) return Token::ILLEGAL;
} else if (c0_ == 'o' || c0_ == 'O') {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
kind = OCTAL;
if (!ScanOctalDigits()) return Token::ILLEGAL;
if (!ScanOctalDigits<Char>()) return Token::ILLEGAL;
} else if (c0_ == 'b' || c0_ == 'B') {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
kind = BINARY;
if (!ScanBinaryDigits()) return Token::ILLEGAL;
if (!ScanBinaryDigits<Char>()) return Token::ILLEGAL;
} else if ('0' <= c0_ && c0_ <= '7') {
kind = IMPLICIT_OCTAL;
if (!ScanImplicitOctalDigits(start_pos, &kind)) {
if (!ScanImplicitOctalDigits<Char>(start_pos, &kind)) {
return Token::ILLEGAL;
}
if (kind == DECIMAL_WITH_LEADING_ZERO) {
......@@ -1282,7 +1293,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
} else if (c0_ == '8' || c0_ == '9') {
kind = DECIMAL_WITH_LEADING_ZERO;
} else if (allow_harmony_numeric_separator() && c0_ == '_') {
ReportScannerError(Location(source_pos(), source_pos() + 1),
ReportScannerError(Location(SourcePos<Char>(), SourcePos<Char>() + 1),
MessageTemplate::kZeroDigitNumericSeparator);
return Token::ILLEGAL;
}
......@@ -1294,7 +1305,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
if (at_start) {
uint64_t value = 0;
// scan subsequent decimal digits
if (!ScanDecimalAsSmi(&value)) {
if (!ScanDecimalAsSmi<Char>(&value)) {
return Token::ILLEGAL;
}
......@@ -1305,21 +1316,21 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
literal.Complete();
if (kind == DECIMAL_WITH_LEADING_ZERO) {
octal_pos_ = Location(start_pos, source_pos());
octal_pos_ = Location(start_pos, SourcePos<Char>());
octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
}
return Token::SMI;
}
}
if (!ScanDecimalDigits()) return Token::ILLEGAL;
if (!ScanDecimalDigits<Char>()) return Token::ILLEGAL;
if (c0_ == '.') {
seen_period = true;
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
if (allow_harmony_numeric_separator() && c0_ == '_') {
return Token::ILLEGAL;
}
if (!ScanDecimalDigits()) return Token::ILLEGAL;
if (!ScanDecimalDigits<Char>()) return Token::ILLEGAL;
}
}
}
......@@ -1331,15 +1342,15 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
// For simplicity, use 4 bits per character to calculate the maximum
// allowed literal length.
static const int kMaxBigIntCharacters = BigInt::kMaxLengthBits / 4;
int length = source_pos() - start_pos - (kind != DECIMAL ? 2 : 0);
int length = SourcePos<Char>() - start_pos - (kind != DECIMAL ? 2 : 0);
if (length > kMaxBigIntCharacters) {
ReportScannerError(Location(start_pos, source_pos()),
ReportScannerError(Location(start_pos, SourcePos<Char>()),
MessageTemplate::kBigIntTooBig);
return Token::ILLEGAL;
}
is_bigint = true;
Advance();
Advance<Char>();
} else if (c0_ == 'e' || c0_ == 'E') {
// scan exponent, if any
DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
......@@ -1348,9 +1359,9 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
return Token::ILLEGAL;
// scan exponent
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
if (!ScanSignedInteger()) return Token::ILLEGAL;
if (!ScanSignedInteger<Char>()) return Token::ILLEGAL;
}
// The source character immediately following a numeric literal must
......@@ -1364,40 +1375,39 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
literal.Complete();
if (kind == DECIMAL_WITH_LEADING_ZERO) {
octal_pos_ = Location(start_pos, source_pos());
octal_pos_ = Location(start_pos, SourcePos<Char>());
octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
}
return is_bigint ? Token::BIGINT : Token::NUMBER;
}
template <typename Char>
uc32 Scanner::ScanIdentifierUnicodeEscape() {
Advance();
Advance<Char>();
if (c0_ != 'u') return -1;
Advance();
return ScanUnicodeEscape<false>();
Advance<Char>();
return ScanUnicodeEscape<Char, false>();
}
template <bool capture_raw>
template <typename Char, bool capture_raw>
uc32 Scanner::ScanUnicodeEscape() {
// Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
// hex digits between { } is arbitrary. \ and u have already been read.
if (c0_ == '{') {
int begin = source_pos() - 2;
Advance<capture_raw>();
uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10FFFF, begin);
int begin = SourcePos<Char>() - 2;
Advance<Char, capture_raw>();
uc32 cp = ScanUnlimitedLengthHexNumber<Char, capture_raw>(0x10FFFF, begin);
if (cp < 0 || c0_ != '}') {
ReportScannerError(source_pos(),
ReportScannerError(SourcePos<Char>(),
MessageTemplate::kInvalidUnicodeEscapeSequence);
return -1;
}
Advance<capture_raw>();
Advance<Char, capture_raw>();
return cp;
}
const bool unicode = true;
return ScanHexNumber<capture_raw, unicode>(4);
return ScanHexNumber<Char, capture_raw, unicode>(4);
}
......@@ -1535,25 +1545,21 @@ static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
#undef KEYWORD_GROUP_CASE
}
Token::Value Scanner::ScanIdentifierOrKeyword() {
LiteralScope literal(this);
return ScanIdentifierOrKeywordInner(&literal);
}
template <typename Char>
Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
DCHECK(unicode_cache_->IsIdentifierStart(c0_));
bool escaped = false;
if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
Advance<Char>();
} while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
// Identifier starting with lowercase or _.
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
Advance<Char>();
} while (IsAsciiIdentifier(c0_));
if (c0_ <= kMaxAscii && c0_ != '\\') {
......@@ -1574,7 +1580,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
} else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
Advance<Char>();
} while (IsAsciiIdentifier(c0_));
if (c0_ <= kMaxAscii && c0_ != '\\') {
......@@ -1583,7 +1589,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
}
} else if (c0_ == '\\') {
escaped = true;
uc32 c = ScanIdentifierUnicodeEscape();
uc32 c = ScanIdentifierUnicodeEscape<Char>();
DCHECK(!unicode_cache_->IsIdentifierStart(-1));
if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) {
return Token::ILLEGAL;
......@@ -1594,7 +1600,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
while (true) {
if (c0_ == '\\') {
escaped = true;
uc32 c = ScanIdentifierUnicodeEscape();
uc32 c = ScanIdentifierUnicodeEscape<Char>();
// Only allow legal identifier part characters.
// TODO(verwaest): Make this true.
// DCHECK(!unicode_cache_->IsIdentifierPart('\\'));
......@@ -1604,9 +1610,9 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
}
AddLiteralChar(c);
} else if (unicode_cache_->IsIdentifierPart(c0_) ||
(CombineSurrogatePair() &&
(CombineSurrogatePair<Char>() &&
unicode_cache_->IsIdentifierPart(c0_))) {
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
} else {
break;
}
......@@ -1640,6 +1646,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
return Token::IDENTIFIER;
}
template <typename Char>
bool Scanner::ScanRegExpPattern() {
DCHECK(next_next_.token == Token::UNINITIALIZED);
DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV);
......@@ -1650,8 +1657,8 @@ bool Scanner::ScanRegExpPattern() {
// Previous token is either '/' or '/=', in the second case, the
// pattern starts at =.
next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
next_.location.beg_pos = SourcePos<Char>() - (seen_equal ? 2 : 1);
next_.location.end_pos = SourcePos<Char>() - (seen_equal ? 1 : 0);
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp
......@@ -1666,11 +1673,11 @@ bool Scanner::ScanRegExpPattern() {
return false;
}
if (c0_ == '\\') { // Escape sequence.
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
return false;
}
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
// If the escape allows more characters, i.e., \x??, \u????, or \c?,
// only "safe" characters are allowed (letters, digits, underscore),
// otherwise the escape isn't valid and the invalid character has
......@@ -1684,10 +1691,10 @@ bool Scanner::ScanRegExpPattern() {
} else { // Unescaped character.
if (c0_ == '[') in_character_class = true;
if (c0_ == ']') in_character_class = false;
AddLiteralCharAdvance();
AddLiteralCharAdvance<Char>();
}
}
Advance(); // consume '/'
Advance<Char>(); // consume '/'
literal.Complete();
next_.token = Token::REGEXP_LITERAL;
......@@ -1695,7 +1702,10 @@ bool Scanner::ScanRegExpPattern() {
return true;
}
template bool Scanner::ScanRegExpPattern<uint8_t>();
template bool Scanner::ScanRegExpPattern<uint16_t>();
template <typename Char>
Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
DCHECK(next_.token == Token::REGEXP_LITERAL);
......@@ -1728,13 +1738,15 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
if (flags & flag) {
return Nothing<RegExp::Flags>();
}
Advance();
Advance<Char>();
flags |= flag;
}
next_.location.end_pos = source_pos();
next_.location.end_pos = SourcePos<Char>();
return Just(RegExp::Flags(flags));
}
template Maybe<RegExp::Flags> Scanner::ScanRegExpFlags<uint8_t>();
template Maybe<RegExp::Flags> Scanner::ScanRegExpFlags<uint16_t>();
const AstRawString* Scanner::CurrentSymbol(
AstValueFactory* ast_value_factory) const {
......@@ -1808,12 +1820,14 @@ void Scanner::SeekNext(size_t position) {
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
// 2, reset the source to the desired position,
source_->Seek(position);
SPECIALIZE(Seek, position);
// 3, re-scan, by scanning the look-ahead char + 1 token (next_).
c0_ = source_->Advance();
SPECIALIZE(Advance);
Next();
DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
}
#undef SPECIALIZE
} // namespace internal
} // namespace v8
......@@ -78,9 +78,8 @@ class Scanner {
static const int kNoOctalLocation = -1;
static const uc32 kEndOfInput = ScannerStream::kEndOfInput;
explicit Scanner(UnicodeCache* scanner_contants);
void Initialize(CharacterStream<uint16_t>* source, bool is_module);
Scanner(UnicodeCache* scanner_contants, ScannerStream* source,
bool is_module);
// Returns the next token and advances input.
Token::Value Next();
......@@ -210,18 +209,18 @@ class Scanner {
return has_line_terminator_after_next_;
}
#define SPECIALIZE(Call) \
(source_->is_two_byte() ? Call<uint16_t>() : Call<uint8_t>())
// Scans the input as a regular expression pattern, next token must be /(=).
// Returns true if a pattern is scanned.
bool ScanRegExpPattern();
bool ScanRegExpPattern() { return SPECIALIZE(ScanRegExpPattern); }
// Scans the input as regular expression flags. Returns the flags on success.
Maybe<RegExp::Flags> ScanRegExpFlags();
Maybe<RegExp::Flags> ScanRegExpFlags() { return SPECIALIZE(ScanRegExpFlags); }
// Scans the input as a template literal
Token::Value ScanTemplateStart();
Token::Value ScanTemplateStart() { return SPECIALIZE(ScanTemplateStart); }
Token::Value ScanTemplateContinuation() {
DCHECK_EQ(next_.token, Token::RBRACE);
next_.location.beg_pos = source_pos() - 1; // We already consumed }
return ScanTemplateSpan();
return SPECIALIZE(ScanTemplateContinuation);
}
Handle<String> SourceUrl(Isolate* isolate) const;
......@@ -244,6 +243,34 @@ class Scanner {
allow_harmony_numeric_separator_ = allow;
}
// Call this after setting source_ to the input.
void Initialize() {
// Initialize current_ to not refer to a literal.
current_.token = Token::UNINITIALIZED;
current_.contextual_token = Token::UNINITIALIZED;
current_.literal_chars = nullptr;
current_.raw_literal_chars = nullptr;
current_.invalid_template_escape_message = MessageTemplate::kNone;
next_.token = Token::UNINITIALIZED;
next_.contextual_token = Token::UNINITIALIZED;
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
next_.invalid_template_escape_message = MessageTemplate::kNone;
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
next_next_.literal_chars = nullptr;
next_next_.raw_literal_chars = nullptr;
next_next_.invalid_template_escape_message = MessageTemplate::kNone;
found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone;
// Set c0_ (one character ahead)
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
SPECIALIZE(Advance);
// Scan the first token.
SPECIALIZE(Scan);
}
private:
// Scoped helper for saving & restoring scanner error state.
// This is used for tagged template literals, in which normally forbidden
......@@ -378,33 +405,10 @@ class Scanner {
const int kMaxAscii = 127;
// Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
template <bool capture_raw>
template <typename Char, bool capture_raw>
uc32 ScanOctalEscape(uc32 c, int length);
// Call this after setting source_ to the input.
void Init() {
// Set c0_ (one character ahead)
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
// Initialize current_ to not refer to a literal.
current_.token = Token::UNINITIALIZED;
current_.contextual_token = Token::UNINITIALIZED;
current_.literal_chars = nullptr;
current_.raw_literal_chars = nullptr;
current_.invalid_template_escape_message = MessageTemplate::kNone;
next_.token = Token::UNINITIALIZED;
next_.contextual_token = Token::UNINITIALIZED;
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
next_.invalid_template_escape_message = MessageTemplate::kNone;
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
next_next_.literal_chars = nullptr;
next_next_.raw_literal_chars = nullptr;
next_next_.invalid_template_escape_message = MessageTemplate::kNone;
found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone;
}
#undef SPECIALIZE
void ReportScannerError(const Location& location,
MessageTemplate::Template error) {
......@@ -466,60 +470,76 @@ class Scanner {
next_.raw_literal_chars = nullptr;
}
template <typename Char>
inline void AddLiteralCharAdvance() {
AddLiteralChar(c0_);
Advance();
Advance<Char>();
}
template <typename Char>
CharacterStream<Char>* Source() {
return static_cast<CharacterStream<Char>*>(source_);
}
template <typename Char>
void Seek(size_t pos) {
Source<Char>()->Seek(pos);
}
// Low-level scanning support.
template <bool capture_raw = false>
template <typename Char, bool capture_raw = false>
void Advance() {
if (capture_raw) {
AddRawLiteralChar(c0_);
}
c0_ = source_->Advance();
if (capture_raw) AddRawLiteralChar(c0_);
c0_ = Source<Char>()->Advance();
}
template <typename FunctionType>
template <typename Char, typename FunctionType>
V8_INLINE void AdvanceUntil(FunctionType check) {
c0_ = source_->AdvanceUntil(check);
c0_ = Source<Char>()->AdvanceUntil(check);
}
template <typename Char>
bool CombineSurrogatePair() {
if (sizeof(Char) == 1) return false;
DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput));
if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
uc32 c1 = source_->Advance();
uc32 c1 = Source<Char>()->Advance();
DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput));
if (unibrow::Utf16::IsTrailSurrogate(c1)) {
c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
return true;
}
source_->Back();
Source<Char>()->Back();
}
return false;
}
template <typename Char>
void PushBack(uc32 ch) {
DCHECK_LE(c0_, static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode));
source_->Back();
c0_ = ch;
}
uc32 Peek() const { return source_->Peek(); }
template <typename Char>
uc32 Peek() {
return Source<Char>()->Peek();
}
template <typename Char>
inline Token::Value Select(Token::Value tok) {
Advance();
Advance<Char>();
return tok;
}
template <typename Char>
inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
Advance();
Advance<Char>();
if (c0_ == next) {
Advance();
Advance<Char>();
return then;
} else {
return else_;
}
return else_;
}
// Returns the literal string, if any, for the current token (the
// token last returned by Next()). The string is 0-terminated.
......@@ -576,65 +596,111 @@ class Scanner {
return current_.raw_literal_chars->is_one_byte();
}
template <bool capture_raw, bool unicode = false>
template <typename Char, bool capture_raw, bool unicode = false>
uc32 ScanHexNumber(int expected_length);
// Scan a number of any length but not bigger than max_value. For example, the
// number can be 000000001, so it's very long in characters but its value is
// small.
template <bool capture_raw>
template <typename Char, bool capture_raw>
uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
// Scans a single JavaScript token.
template <typename Char>
void Scan();
template <typename Char>
V8_INLINE Token::Value SkipWhiteSpace();
template <typename Char>
Token::Value SkipSingleHTMLComment();
template <typename Char>
Token::Value SkipSingleLineComment();
template <typename Char>
Token::Value SkipSourceURLComment();
template <typename Char>
void TryToParseSourceURLComment();
template <typename Char>
Token::Value SkipMultiLineComment();
// Scans a possible HTML comment -- begins with '<!'.
template <typename Char>
Token::Value ScanHtmlComment();
template <typename Char>
bool ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
bool is_check_first_digit);
template <typename Char>
bool ScanDecimalDigits();
// Optimized function to scan decimal number as Smi.
template <typename Char>
bool ScanDecimalAsSmi(uint64_t* value);
template <typename Char>
bool ScanDecimalAsSmiWithNumericSeparators(uint64_t* value);
template <typename Char>
bool ScanHexDigits();
template <typename Char>
bool ScanBinaryDigits();
template <typename Char>
bool ScanSignedInteger();
template <typename Char>
bool ScanOctalDigits();
template <typename Char>
bool ScanImplicitOctalDigits(int start_pos, NumberKind* kind);
template <typename Char>
Token::Value ScanNumber(bool seen_period);
Token::Value ScanIdentifierOrKeyword();
template <typename Char>
inline Token::Value ScanIdentifierOrKeyword() {
LiteralScope literal(this);
return ScanIdentifierOrKeywordInner<Char>(&literal);
}
template <typename Char>
Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal);
template <typename Char>
Token::Value ScanString();
template <typename Char>
Token::Value ScanPrivateName();
// Scans an escape-sequence which is part of a string and adds the
// decoded character to the current literal. Returns true if a pattern
// is scanned.
template <bool capture_raw>
template <typename Char, bool capture_raw>
bool ScanEscape();
// Decodes a Unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
template <typename Char>
uc32 ScanIdentifierUnicodeEscape();
// Helper for the above functions.
template <bool capture_raw>
template <typename Char, bool capture_raw>
uc32 ScanUnicodeEscape();
template <typename Char>
bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success.
template <typename Char>
Maybe<RegExp::Flags> ScanRegExpFlags();
// Scans the input as a template literal
template <typename Char>
Token::Value ScanTemplateStart();
template <typename Char>
Token::Value ScanTemplateContinuation() {
DCHECK_EQ(next_.token, Token::RBRACE);
next_.location.beg_pos = SourcePos<Char>() - 1; // We already consumed }
return ScanTemplateSpan<Char>();
}
bool is_module_;
template <typename Char>
Token::Value ScanTemplateSpan();
// Return the current source position.
int source_pos() {
return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
template <typename Char>
int SourcePos() {
return static_cast<int>(Source<Char>()->pos()) -
kCharacterLookaheadBufferSize;
}
static bool LiteralContainsEscapes(const TokenDesc& token) {
......@@ -672,8 +738,8 @@ class Scanner {
TokenDesc next_; // desc for next token (one token look-ahead)
TokenDesc next_next_; // desc for the token after next (after PeakAhead())
// Input stream. Must be initialized to a CharacterStream.
CharacterStream<uint16_t>* source_;
// Input stream. Must be initialized to a ScannerStream.
ScannerStream* const source_;
// Last-seen positions of potentially problematic tokens.
Location octal_pos_;
......
......@@ -246,7 +246,7 @@ TEST(AdvanceMatchAdvanceUntil) {
nullptr));
int32_t au_c0 =
static_cast<i::CharacterStream<uint16_t>*>(stream_advance_until.get())
static_cast<i::CharacterStream<uint8_t>*>(stream_advance_until.get())
->AdvanceUntil(
[](int32_t c0) { return unibrow::IsLineTerminator(c0); });
......
......@@ -27,7 +27,7 @@ struct ScannerTestHelper {
scanner(std::move(other.scanner)) {}
std::unique_ptr<UnicodeCache> unicode_cache;
std::unique_ptr<CharacterStream<uint16_t>> stream;
std::unique_ptr<CharacterStream<uint8_t>> stream;
std::unique_ptr<Scanner> scanner;
Scanner* operator->() const { return scanner.get(); }
......@@ -38,9 +38,9 @@ ScannerTestHelper make_scanner(const char* src) {
ScannerTestHelper helper;
helper.unicode_cache = std::unique_ptr<UnicodeCache>(new UnicodeCache);
helper.stream = ScannerStream::ForTesting(src);
helper.scanner =
std::unique_ptr<Scanner>(new Scanner(helper.unicode_cache.get()));
helper.scanner->Initialize(helper.stream.get(), false);
helper.scanner = std::unique_ptr<Scanner>(
new Scanner(helper.unicode_cache.get(), helper.stream.get(), false));
helper.scanner->Initialize();
return helper;
}
......
......@@ -92,16 +92,16 @@ TEST(ScanKeywords) {
CHECK(static_cast<int>(sizeof(buffer)) >= length);
{
auto stream = i::ScannerStream::ForTesting(keyword, length);
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
CHECK_EQ(key_token.token, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
// Removing characters will make keyword matching fail.
{
auto stream = i::ScannerStream::ForTesting(keyword, length - 1);
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
......@@ -111,8 +111,8 @@ TEST(ScanKeywords) {
i::MemMove(buffer, keyword, length);
buffer[length] = chars_to_append[j];
auto stream = i::ScannerStream::ForTesting(buffer, length + 1);
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
......@@ -121,8 +121,8 @@ TEST(ScanKeywords) {
i::MemMove(buffer, keyword, length);
buffer[length - 1] = '_';
auto stream = i::ScannerStream::ForTesting(buffer, length);
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
......@@ -188,8 +188,8 @@ TEST(ScanHTMLEndComments) {
for (int i = 0; tests[i]; i++) {
const char* source = tests[i];
auto stream = i::ScannerStream::ForTesting(source);
i::Scanner scanner(i_isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Scanner scanner(i_isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Zone zone(i_isolate->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(&zone,
i_isolate->ast_string_constants(),
......@@ -207,8 +207,8 @@ TEST(ScanHTMLEndComments) {
for (int i = 0; fail_tests[i]; i++) {
const char* source = fail_tests[i];
auto stream = i::ScannerStream::ForTesting(source);
i::Scanner scanner(i_isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Scanner scanner(i_isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Zone zone(i_isolate->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(&zone,
i_isolate->ast_string_constants(),
......@@ -232,8 +232,8 @@ TEST(ScanHtmlComments) {
// Disallow HTML comments.
{
auto stream = i::ScannerStream::ForTesting(src);
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), true);
i::Scanner scanner(&unicode_cache, stream.get(), true);
scanner.Initialize();
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::ILLEGAL, scanner.Next());
}
......@@ -241,8 +241,8 @@ TEST(ScanHtmlComments) {
// Skip HTML comments:
{
auto stream = i::ScannerStream::ForTesting(src);
i::Scanner scanner(&unicode_cache);
scanner.Initialize(stream.get(), false);
i::Scanner scanner(&unicode_cache, stream.get(), false);
scanner.Initialize();
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
}
......@@ -280,8 +280,8 @@ TEST(StandAlonePreParser) {
uintptr_t stack_limit = i_isolate->stack_guard()->real_climit();
for (int i = 0; programs[i]; i++) {
auto stream = i::ScannerStream::ForTesting(programs[i]);
i::Scanner scanner(i_isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Scanner scanner(i_isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Zone zone(i_isolate->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(&zone,
......@@ -313,8 +313,8 @@ TEST(StandAlonePreParserNoNatives) {
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
for (int i = 0; programs[i]; i++) {
auto stream = i::ScannerStream::ForTesting(programs[i]);
i::Scanner scanner(isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Scanner scanner(isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
// Preparser defaults to disallowing natives syntax.
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
......@@ -348,8 +348,8 @@ TEST(RegressChromium62639) {
// failed in debug mode, and sometimes crashed in release mode.
auto stream = i::ScannerStream::ForTesting(program);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
&zone, CcTest::i_isolate()->ast_string_constants(),
......@@ -381,8 +381,8 @@ TEST(PreParseOverflow) {
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
auto stream = i::ScannerStream::ForTesting(program.get(), kProgramSize);
i::Scanner scanner(isolate->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Scanner scanner(isolate->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
......@@ -396,12 +396,12 @@ TEST(PreParseOverflow) {
CHECK_EQ(i::PreParser::kPreParseStackOverflow, result);
}
void TestStreamScanner(i::CharacterStream<uint16_t>* stream,
void TestStreamScanner(i::ScannerStream* stream,
i::Token::Value* expected_tokens,
int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) {
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
scanner.Initialize(stream, false);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), stream, false);
scanner.Initialize();
int i = 0;
do {
......@@ -475,8 +475,8 @@ TEST(StreamScanner) {
void TestScanRegExp(const char* re_source, const char* expected) {
auto stream = i::ScannerStream::ForTesting(re_source);
i::HandleScope scope(CcTest::i_isolate());
i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
scanner.Initialize(stream.get(), false);
i::Scanner scanner(CcTest::i_isolate()->unicode_cache(), stream.get(), false);
scanner.Initialize();
i::Token::Value start = scanner.peek();
CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
......@@ -1141,8 +1141,6 @@ void SetParserFlags(i::PreParser* parser, i::EnumSet<ParserFlag> flags) {
parser->set_allow_natives(flags.Contains(kAllowNatives));
parser->set_allow_harmony_public_fields(
flags.Contains(kAllowHarmonyPublicFields));
parser->set_allow_harmony_private_fields(
flags.Contains(kAllowHarmonyPrivateFields));
parser->set_allow_harmony_static_fields(
flags.Contains(kAllowHarmonyStaticFields));
parser->set_allow_harmony_dynamic_import(
......@@ -1151,11 +1149,13 @@ void SetParserFlags(i::PreParser* parser, i::EnumSet<ParserFlag> flags) {
flags.Contains(kAllowHarmonyImportMeta));
parser->set_allow_harmony_do_expressions(
flags.Contains(kAllowHarmonyDoExpressions));
parser->set_allow_harmony_private_fields(
flags.Contains(kAllowHarmonyPrivateFields));
parser->set_allow_harmony_numeric_separator(
flags.Contains(kAllowHarmonyNumericSeparator));
}
template <typename Char>
void TestParserSyncWithFlags(i::Handle<i::String> source,
i::EnumSet<ParserFlag> flags,
ParserSyncTestResult result,
......@@ -1169,9 +1169,9 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
// Preparse the data.
i::PendingCompilationErrorHandler pending_error_handler;
if (test_preparser) {
i::Scanner scanner(isolate->unicode_cache());
std::unique_ptr<i::ScannerStream> stream(
i::ScannerStream::For(isolate, source));
i::Scanner scanner(isolate->unicode_cache(), stream.get(), is_module);
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
&zone, CcTest::i_isolate()->ast_string_constants(),
......@@ -1181,8 +1181,8 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
isolate->counters()->runtime_call_stats(),
isolate->logger(), -1, is_module);
SetParserFlags(&preparser, flags);
scanner.Initialize(static_cast<CharacterStream<Char>*>(stream.get()),
is_module);
scanner.Initialize();
// Make sure we can the first token with the correct flags.
i::PreParser::PreParseResult result = preparser.PreParseProgram();
CHECK_EQ(i::PreParser::kPreParseSuccess, result);
}
......@@ -1292,15 +1292,8 @@ void TestParserSync(const char* source, const ParserFlag* varying_flags,
++flag_index) {
flags.Remove(always_false_flags[flag_index]);
}
if (str->IsSeqOneByteString()) {
// TODO(verwaest): Switch to uint8_t.
TestParserSyncWithFlags<uint16_t>(str, flags, result, is_module,
test_preparser, ignore_error_msg);
} else {
DCHECK(str->IsSeqTwoByteString());
TestParserSyncWithFlags<uint16_t>(str, flags, result, is_module,
test_preparser, ignore_error_msg);
}
TestParserSyncWithFlags(str, flags, result, is_module, test_preparser,
ignore_error_msg);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment