Commit 2d40e2f4 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Prepare CharacterStreams for specializing scanner and parser by character type

This templatizes CharacterStream by char type, and makes them subclass ScannerStream.
Methods that are widely used by tests are marked virtual on ScannerStream and final on
CharacterStream<T> so the specialized scanner will know what to call. ParseInfo passes
around ScannerStream, but the scanner requires the explicit CharacterStream<T>. Since
AdvanceUntil is templatized by FunctionType, I couldn't mark that virtual; so instead
I adjusted those tests to operate directly on ucs2 (not utf8 since we'll drop that in
the future).

In the end no functionality was changed. Some calls became virtual in tests. This is
mainly just preparation.

Change-Id: I0b4def65d3eb8fa5c806027c7e9123a590ebbdb5
Reviewed-on: https://chromium-review.googlesource.com/1156690
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54848}
parent 65e3cea3
...@@ -234,13 +234,13 @@ UnoptimizedCompilationJob::Status AsmJsCompilationJob::ExecuteJobImpl() { ...@@ -234,13 +234,13 @@ UnoptimizedCompilationJob::Status AsmJsCompilationJob::ExecuteJobImpl() {
Zone* compile_zone = compilation_info()->zone(); Zone* compile_zone = compilation_info()->zone();
Zone translate_zone(allocator_, ZONE_NAME); Zone translate_zone(allocator_, ZONE_NAME);
Utf16CharacterStream* stream = parse_info()->character_stream(); ScannerStream* stream = parse_info()->character_stream();
base::Optional<AllowHandleDereference> allow_deref; base::Optional<AllowHandleDereference> allow_deref;
if (stream->can_access_heap()) { if (stream->can_access_heap()) {
allow_deref.emplace(); allow_deref.emplace();
} }
stream->Seek(compilation_info()->literal()->start_position()); wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream,
wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream); compilation_info()->literal()->start_position());
if (!parser.Run()) { if (!parser.Run()) {
if (!FLAG_suppress_asm_messages) { if (!FLAG_suppress_asm_messages) {
ReportCompilationFailure(parse_info(), parser.failure_location(), ReportCompilationFailure(parse_info(), parser.failure_location(),
......
...@@ -69,9 +69,9 @@ namespace wasm { ...@@ -69,9 +69,9 @@ namespace wasm {
#define TOK(name) AsmJsScanner::kToken_##name #define TOK(name) AsmJsScanner::kToken_##name
AsmJsParser::AsmJsParser(Zone* zone, uintptr_t stack_limit, AsmJsParser::AsmJsParser(Zone* zone, uintptr_t stack_limit,
Utf16CharacterStream* stream) ScannerStream* stream, int start)
: zone_(zone), : zone_(zone),
scanner_(stream), scanner_(static_cast<CharacterStream<uint16_t>*>(stream), start),
module_builder_(new (zone) WasmModuleBuilder(zone)), module_builder_(new (zone) WasmModuleBuilder(zone)),
return_type_(nullptr), return_type_(nullptr),
stack_limit_(stack_limit), stack_limit_(stack_limit),
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class Utf16CharacterStream; class ScannerStream;
namespace wasm { namespace wasm {
...@@ -49,8 +49,8 @@ class AsmJsParser { ...@@ -49,8 +49,8 @@ class AsmJsParser {
typedef EnumSet<StandardMember, uint64_t> StdlibSet; typedef EnumSet<StandardMember, uint64_t> StdlibSet;
explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, ScannerStream* stream,
Utf16CharacterStream* stream); int start);
bool Run(); bool Run();
const char* failure_message() const { return failure_message_; } const char* failure_message() const { return failure_message_; }
int failure_location() const { return failure_location_; } int failure_location() const { return failure_location_; }
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "src/char-predicates-inl.h" #include "src/char-predicates-inl.h"
#include "src/conversions.h" #include "src/conversions.h"
#include "src/flags.h" #include "src/flags.h"
#include "src/parsing/scanner-character-streams.h"
#include "src/parsing/scanner.h" #include "src/parsing/scanner.h"
#include "src/unicode-cache.h" #include "src/unicode-cache.h"
...@@ -19,7 +20,7 @@ namespace { ...@@ -19,7 +20,7 @@ namespace {
static const int kMaxIdentifierCount = 0xF000000; static const int kMaxIdentifierCount = 0xF000000;
}; };
AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream) AsmJsScanner::AsmJsScanner(CharacterStream<uint16_t>* stream, int start)
: stream_(stream), : stream_(stream),
token_(kUninitialized), token_(kUninitialized),
preceding_token_(kUninitialized), preceding_token_(kUninitialized),
...@@ -33,6 +34,7 @@ AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream) ...@@ -33,6 +34,7 @@ AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream)
double_value_(0.0), double_value_(0.0),
unsigned_value_(0), unsigned_value_(0),
preceded_by_newline_(false) { preceded_by_newline_(false) {
stream->Seek(start);
#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
STDLIB_MATH_FUNCTION_LIST(V) STDLIB_MATH_FUNCTION_LIST(V)
STDLIB_ARRAY_TYPE_LIST(V) STDLIB_ARRAY_TYPE_LIST(V)
......
...@@ -16,7 +16,8 @@ ...@@ -16,7 +16,8 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class Utf16CharacterStream; template <typename Char>
class CharacterStream;
// A custom scanner to extract the token stream needed to parse valid // A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/ // asm.js: http://asmjs.org/spec/latest/
...@@ -31,7 +32,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner { ...@@ -31,7 +32,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
public: public:
typedef int32_t token_t; typedef int32_t token_t;
explicit AsmJsScanner(Utf16CharacterStream* stream); AsmJsScanner(CharacterStream<uint16_t>* stream, int start);
// Get current token. // Get current token.
token_t Token() const { return token_; } token_t Token() const { return token_; }
...@@ -136,7 +137,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner { ...@@ -136,7 +137,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
// clang-format on // clang-format on
private: private:
Utf16CharacterStream* stream_; CharacterStream<uint16_t>* stream_;
token_t token_; token_t token_;
token_t preceding_token_; token_t preceding_token_;
token_t next_token_; // Only set when in {rewind} state. token_t next_token_; // Only set when in {rewind} state.
......
...@@ -133,7 +133,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) { ...@@ -133,7 +133,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
DCHECK(script->type() != Script::TYPE_NATIVE); DCHECK(script->type() != Script::TYPE_NATIVE);
Handle<String> source(String::cast(script->source()), isolate); Handle<String> source(String::cast(script->source()), isolate);
if (source->IsExternalTwoByteString() || source->IsExternalOneByteString()) { if (source->IsExternalTwoByteString() || source->IsExternalOneByteString()) {
std::unique_ptr<Utf16CharacterStream> stream(ScannerStream::For( std::unique_ptr<ScannerStream> stream(ScannerStream::For(
isolate, source, shared_->StartPosition(), shared_->EndPosition())); isolate, source, shared_->StartPosition(), shared_->EndPosition()));
parse_info_->set_character_stream(std::move(stream)); parse_info_->set_character_stream(std::move(stream));
} else { } else {
...@@ -191,7 +191,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) { ...@@ -191,7 +191,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
.ToHandleChecked(); .ToHandleChecked();
} }
wrapper_ = isolate->global_handles()->Create(*wrapper); wrapper_ = isolate->global_handles()->Create(*wrapper);
std::unique_ptr<Utf16CharacterStream> stream( std::unique_ptr<ScannerStream> stream(
ScannerStream::For(isolate, wrapper_, shared_->StartPosition() - offset, ScannerStream::For(isolate, wrapper_, shared_->StartPosition() - offset,
shared_->EndPosition() - offset)); shared_->EndPosition() - offset));
parse_info_->set_character_stream(std::move(stream)); parse_info_->set_character_stream(std::move(stream));
......
...@@ -966,7 +966,7 @@ BackgroundCompileTask::BackgroundCompileTask(ScriptStreamingData* source, ...@@ -966,7 +966,7 @@ BackgroundCompileTask::BackgroundCompileTask(ScriptStreamingData* source,
info->set_runtime_call_stats(nullptr); info->set_runtime_call_stats(nullptr);
} }
info->set_toplevel(); info->set_toplevel();
std::unique_ptr<Utf16CharacterStream> stream( std::unique_ptr<ScannerStream> stream(
ScannerStream::For(source->source_stream.get(), source->encoding, ScannerStream::For(source->source_stream.get(), source->encoding,
info->runtime_call_stats())); info->runtime_call_stats()));
info->set_character_stream(std::move(stream)); info->set_character_stream(std::move(stream));
......
...@@ -198,7 +198,7 @@ void ParseInfo::AllocateSourceRangeMap() { ...@@ -198,7 +198,7 @@ void ParseInfo::AllocateSourceRangeMap() {
void ParseInfo::ResetCharacterStream() { character_stream_.reset(); } void ParseInfo::ResetCharacterStream() { character_stream_.reset(); }
void ParseInfo::set_character_stream( void ParseInfo::set_character_stream(
std::unique_ptr<Utf16CharacterStream> character_stream) { std::unique_ptr<ScannerStream> character_stream) {
DCHECK_NULL(character_stream_); DCHECK_NULL(character_stream_);
character_stream_.swap(character_stream); character_stream_.swap(character_stream);
} }
......
...@@ -31,7 +31,7 @@ class RuntimeCallStats; ...@@ -31,7 +31,7 @@ class RuntimeCallStats;
class Logger; class Logger;
class SourceRangeMap; class SourceRangeMap;
class UnicodeCache; class UnicodeCache;
class Utf16CharacterStream; class ScannerStream;
class Zone; class Zone;
// A container for the inputs, configuration options, and outputs of parsing. // A container for the inputs, configuration options, and outputs of parsing.
...@@ -97,11 +97,8 @@ class V8_EXPORT_PRIVATE ParseInfo { ...@@ -97,11 +97,8 @@ class V8_EXPORT_PRIVATE ParseInfo {
: NO_PARSE_RESTRICTION; : NO_PARSE_RESTRICTION;
} }
Utf16CharacterStream* character_stream() const { ScannerStream* character_stream() const { return character_stream_.get(); }
return character_stream_.get(); void set_character_stream(std::unique_ptr<ScannerStream> character_stream);
}
void set_character_stream(
std::unique_ptr<Utf16CharacterStream> character_stream);
void ResetCharacterStream(); void ResetCharacterStream();
v8::Extension* extension() const { return extension_; } v8::Extension* extension() const { return extension_; }
...@@ -274,7 +271,7 @@ class V8_EXPORT_PRIVATE ParseInfo { ...@@ -274,7 +271,7 @@ class V8_EXPORT_PRIVATE ParseInfo {
MaybeHandle<ScopeInfo> maybe_outer_scope_info_; MaybeHandle<ScopeInfo> maybe_outer_scope_info_;
//----------- Inputs+Outputs of parsing and scope analysis ----------------- //----------- Inputs+Outputs of parsing and scope analysis -----------------
std::unique_ptr<Utf16CharacterStream> character_stream_; std::unique_ptr<ScannerStream> character_stream_;
ConsumedPreParsedScopeData consumed_preparsed_scope_data_; ConsumedPreParsedScopeData consumed_preparsed_scope_data_;
std::shared_ptr<AstValueFactory> ast_value_factory_; std::shared_ptr<AstValueFactory> ast_value_factory_;
const class AstStringConstants* ast_string_constants_; const class AstStringConstants* ast_string_constants_;
......
...@@ -507,7 +507,9 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) { ...@@ -507,7 +507,9 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) {
// Initialize parser state. // Initialize parser state.
DeserializeScopeChain(isolate, info, info->maybe_outer_scope_info()); DeserializeScopeChain(isolate, info, info->maybe_outer_scope_info());
scanner_.Initialize(info->character_stream(), info->is_module()); auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
FunctionLiteral* result = DoParseProgram(isolate, info); FunctionLiteral* result = DoParseProgram(isolate, info);
MaybeResetCharacterStream(info, result); MaybeResetCharacterStream(info, result);
...@@ -701,7 +703,9 @@ FunctionLiteral* Parser::ParseFunction(Isolate* isolate, ParseInfo* info, ...@@ -701,7 +703,9 @@ FunctionLiteral* Parser::ParseFunction(Isolate* isolate, ParseInfo* info,
// Initialize parser state. // Initialize parser state.
Handle<String> name(shared_info->Name(), isolate); Handle<String> name(shared_info->Name(), isolate);
info->set_function_name(ast_value_factory()->GetString(name)); info->set_function_name(ast_value_factory()->GetString(name));
scanner_.Initialize(info->character_stream(), info->is_module()); auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
FunctionLiteral* result = FunctionLiteral* result =
DoParseFunction(isolate, info, info->function_name()); DoParseFunction(isolate, info, info->function_name());
...@@ -3435,7 +3439,9 @@ void Parser::ParseOnBackground(ParseInfo* info) { ...@@ -3435,7 +3439,9 @@ void Parser::ParseOnBackground(ParseInfo* info) {
DCHECK_NULL(info->literal()); DCHECK_NULL(info->literal());
FunctionLiteral* result = nullptr; FunctionLiteral* result = nullptr;
scanner_.Initialize(info->character_stream(), info->is_module()); auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
DCHECK(info->maybe_outer_scope_info().is_null()); DCHECK(info->maybe_outer_scope_info().is_null());
DCHECK(original_scope_); DCHECK(original_scope_);
......
...@@ -26,8 +26,7 @@ bool ParseProgram(ParseInfo* info, Isolate* isolate) { ...@@ -26,8 +26,7 @@ bool ParseProgram(ParseInfo* info, Isolate* isolate) {
// Create a character stream for the parser. // Create a character stream for the parser.
Handle<String> source(String::cast(info->script()->source()), isolate); Handle<String> source(String::cast(info->script()->source()), isolate);
isolate->counters()->total_parse_size()->Increment(source->length()); isolate->counters()->total_parse_size()->Increment(source->length());
std::unique_ptr<Utf16CharacterStream> stream( std::unique_ptr<ScannerStream> stream(ScannerStream::For(isolate, source));
ScannerStream::For(isolate, source));
info->set_character_stream(std::move(stream)); info->set_character_stream(std::move(stream));
Parser parser(info); Parser parser(info);
...@@ -61,7 +60,7 @@ bool ParseFunction(ParseInfo* info, Handle<SharedFunctionInfo> shared_info, ...@@ -61,7 +60,7 @@ bool ParseFunction(ParseInfo* info, Handle<SharedFunctionInfo> shared_info,
// Create a character stream for the parser. // Create a character stream for the parser.
Handle<String> source(String::cast(info->script()->source()), isolate); Handle<String> source(String::cast(info->script()->source()), isolate);
isolate->counters()->total_parse_size()->Increment(source->length()); isolate->counters()->total_parse_size()->Increment(source->length());
std::unique_ptr<Utf16CharacterStream> stream( std::unique_ptr<ScannerStream> stream(
ScannerStream::For(isolate, source, shared_info->StartPosition(), ScannerStream::For(isolate, source, shared_info->StartPosition(),
shared_info->EndPosition())); shared_info->EndPosition()));
info->set_character_stream(std::move(stream)); info->set_character_stream(std::move(stream));
......
...@@ -157,7 +157,7 @@ class ChunkedStream { ...@@ -157,7 +157,7 @@ class ChunkedStream {
// Chars are buffered if either the underlying stream isn't utf-16 or the // Chars are buffered if either the underlying stream isn't utf-16 or the
// underlying utf-16 stream might move (is on-heap). // underlying utf-16 stream might move (is on-heap).
template <typename Char, template <typename T> class ByteStream> template <typename Char, template <typename T> class ByteStream>
class BufferedCharacterStream : public Utf16CharacterStream { class BufferedCharacterStream : public CharacterStream<uint16_t> {
public: public:
template <class... TArgs> template <class... TArgs>
BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) { BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
...@@ -194,7 +194,7 @@ class BufferedCharacterStream : public Utf16CharacterStream { ...@@ -194,7 +194,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
// Provides a unbuffered utf-16 view on the bytes from the underlying // Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream. // ByteStream.
template <template <typename T> class ByteStream> template <template <typename T> class ByteStream>
class UnbufferedCharacterStream : public Utf16CharacterStream { class UnbufferedCharacterStream : public CharacterStream<uint16_t> {
public: public:
template <class... TArgs> template <class... TArgs>
UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) { UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
...@@ -268,7 +268,7 @@ class RelocatingCharacterStream ...@@ -268,7 +268,7 @@ class RelocatingCharacterStream
// even positions before the current). // even positions before the current).
// //
// TODO(verwaest): Remove together with Utf8 external streaming streams. // TODO(verwaest): Remove together with Utf8 external streaming streams.
class BufferedUtf16CharacterStream : public Utf16CharacterStream { class BufferedUtf16CharacterStream : public CharacterStream<uint16_t> {
public: public:
BufferedUtf16CharacterStream(); BufferedUtf16CharacterStream();
...@@ -287,7 +287,7 @@ class BufferedUtf16CharacterStream : public Utf16CharacterStream { ...@@ -287,7 +287,7 @@ class BufferedUtf16CharacterStream : public Utf16CharacterStream {
}; };
BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
: Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {} : CharacterStream(buffer_, buffer_, buffer_, 0) {}
bool BufferedUtf16CharacterStream::ReadBlock() { bool BufferedUtf16CharacterStream::ReadBlock() {
DCHECK_EQ(buffer_start_, buffer_); DCHECK_EQ(buffer_start_, buffer_);
...@@ -585,12 +585,11 @@ size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) { ...@@ -585,12 +585,11 @@ size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) {
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// ScannerStream: Create stream instances. // ScannerStream: Create stream instances.
Utf16CharacterStream* ScannerStream::For(Isolate* isolate, ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data) {
Handle<String> data) {
return ScannerStream::For(isolate, data, 0, data->length()); return ScannerStream::For(isolate, data, 0, data->length());
} }
Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data, ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos) { int start_pos, int end_pos) {
DCHECK_GE(start_pos, 0); DCHECK_GE(start_pos, 0);
DCHECK_LE(start_pos, end_pos); DCHECK_LE(start_pos, end_pos);
...@@ -629,20 +628,20 @@ Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data, ...@@ -629,20 +628,20 @@ Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
} }
} }
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting( std::unique_ptr<CharacterStream<uint16_t>> ScannerStream::ForTesting(
const char* data) { const char* data) {
return ScannerStream::ForTesting(data, strlen(data)); return ScannerStream::ForTesting(data, strlen(data));
} }
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting( std::unique_ptr<CharacterStream<uint16_t>> ScannerStream::ForTesting(
const char* data, size_t length) { const char* data, size_t length) {
return std::unique_ptr<Utf16CharacterStream>( return std::unique_ptr<CharacterStream<uint16_t>>(
new BufferedCharacterStream<uint8_t, ExternalStringStream>( new BufferedCharacterStream<uint8_t, ExternalStringStream>(
static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data), static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
static_cast<size_t>(length))); static_cast<size_t>(length)));
} }
Utf16CharacterStream* ScannerStream::For( ScannerStream* ScannerStream::For(
ScriptCompiler::ExternalSourceStream* source_stream, ScriptCompiler::ExternalSourceStream* source_stream,
v8::ScriptCompiler::StreamedSource::Encoding encoding, v8::ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats) { RuntimeCallStats* stats) {
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#ifndef V8_PARSING_SCANNER_CHARACTER_STREAMS_H_ #ifndef V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_ #define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#include <algorithm>
#include "include/v8.h" // for v8::ScriptCompiler #include "include/v8.h" // for v8::ScriptCompiler
#include "src/globals.h" #include "src/globals.h"
...@@ -13,24 +15,182 @@ namespace internal { ...@@ -13,24 +15,182 @@ namespace internal {
template <typename T> template <typename T>
class Handle; class Handle;
class Utf16CharacterStream; template <typename Char>
class CharacterStream;
class RuntimeCallStats; class RuntimeCallStats;
class String; class String;
class V8_EXPORT_PRIVATE ScannerStream { class V8_EXPORT_PRIVATE ScannerStream {
public: public:
static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data); static const uc32 kEndOfInput = -1;
static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data,
static ScannerStream* For(Isolate* isolate, Handle<String> data);
static ScannerStream* For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos); int start_pos, int end_pos);
static Utf16CharacterStream* For( static ScannerStream* For(ScriptCompiler::ExternalSourceStream* source_stream,
ScriptCompiler::ExternalSourceStream* source_stream,
ScriptCompiler::StreamedSource::Encoding encoding, ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats); RuntimeCallStats* stats);
// For testing: // For testing:
static std::unique_ptr<Utf16CharacterStream> ForTesting(const char* data); static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(
static std::unique_ptr<Utf16CharacterStream> ForTesting(const char* data, const char* data);
static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(const char* data,
size_t length); size_t length);
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
virtual uc32 Advance() = 0;
virtual void Seek(size_t pos) = 0;
virtual size_t pos() const = 0;
virtual void Back() = 0;
virtual ~ScannerStream() {}
};
template <typename Char>
class CharacterStream : public ScannerStream {
public:
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() final {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<uc32>(*(buffer_cursor_++));
} else if (ReadBlockChecked()) {
return static_cast<uc32>(*(buffer_cursor_++));
} else {
// Note: currently the following increment is necessary to avoid a
// parser problem! The scanner treats the final kEndOfInput as
// a code unit with a position, and does math relative to that
// position.
buffer_cursor_++;
return kEndOfInput;
}
}
// Returns and advances past the next UTF-16 code unit in the input stream
// that meets the checks requirement. If there are no more code units it
// returns kEndOfInput.
template <typename FunctionType>
V8_INLINE uc32 AdvanceUntil(FunctionType check) {
while (true) {
auto next_cursor_pos =
std::find_if(buffer_cursor_, buffer_end_, [&check](Char raw_c0) {
uc32 c0 = static_cast<uc32>(raw_c0);
return check(c0);
});
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
buffer_cursor_++;
return kEndOfInput;
}
} else {
buffer_cursor_ = next_cursor_pos + 1;
return static_cast<uc32>(*next_cursor_pos);
}
}
}
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() final {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
}
}
// Go back one by two characters in the input stream. (This is the same as
// calling Back() twice. But Back() may - in some instances - do substantial
// work. Back2() guarantees this work will be done only once.)
inline void Back2() {
if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
buffer_cursor_ -= 2;
} else {
ReadBlockAt(pos() - 2);
}
}
inline size_t pos() const final {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) final {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
}
}
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
protected:
CharacterStream(const uint16_t* buffer_start, const uint16_t* buffer_cursor,
const uint16_t* buffer_end, size_t buffer_pos)
: buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
CharacterStream() : CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = ReadBlock();
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
// 3, We should have more characters available iff success.
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_cursor_, buffer_end_);
DCHECK_LE(buffer_start_, buffer_cursor_);
DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
// buffer_cursor_ would point to the beginning of the buffer, and
// buffer_pos would be the old pos().
// - a stream with existing buffer chunks would set buffer_start_ and
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
const Char* buffer_start_;
const Char* buffer_cursor_;
const Char* buffer_end_;
size_t buffer_pos_;
}; };
} // namespace internal } // namespace internal
......
...@@ -188,7 +188,7 @@ Scanner::Scanner(UnicodeCache* unicode_cache) ...@@ -188,7 +188,7 @@ Scanner::Scanner(UnicodeCache* unicode_cache)
allow_harmony_bigint_(false), allow_harmony_bigint_(false),
allow_harmony_numeric_separator_(false) {} allow_harmony_numeric_separator_(false) {}
void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) { void Scanner::Initialize(CharacterStream<uint16_t>* source, bool is_module) {
DCHECK_NOT_NULL(source); DCHECK_NOT_NULL(source);
source_ = source; source_ = source;
is_module_ = is_module; is_module_ = is_module;
......
...@@ -7,13 +7,12 @@ ...@@ -7,13 +7,12 @@
#ifndef V8_PARSING_SCANNER_H_ #ifndef V8_PARSING_SCANNER_H_
#define V8_PARSING_SCANNER_H_ #define V8_PARSING_SCANNER_H_
#include <algorithm>
#include "src/allocation.h" #include "src/allocation.h"
#include "src/base/logging.h" #include "src/base/logging.h"
#include "src/char-predicates.h" #include "src/char-predicates.h"
#include "src/globals.h" #include "src/globals.h"
#include "src/messages.h" #include "src/messages.h"
#include "src/parsing/scanner-character-streams.h"
#include "src/parsing/token.h" #include "src/parsing/token.h"
#include "src/unicode-decoder.h" #include "src/unicode-decoder.h"
#include "src/unicode.h" #include "src/unicode.h"
...@@ -30,161 +29,6 @@ class ExternalTwoByteString; ...@@ -30,161 +29,6 @@ class ExternalTwoByteString;
class ParserRecorder; class ParserRecorder;
class UnicodeCache; class UnicodeCache;
// ---------------------------------------------------------------------
// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
// A code unit is a 16 bit value representing either a 16 bit code point
// or one part of a surrogate pair that make a single 21 bit code point.
class Utf16CharacterStream {
public:
static const uc32 kEndOfInput = -1;
virtual ~Utf16CharacterStream() { }
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<uc32>(*(buffer_cursor_++));
} else if (ReadBlockChecked()) {
return static_cast<uc32>(*(buffer_cursor_++));
} else {
// Note: currently the following increment is necessary to avoid a
// parser problem! The scanner treats the final kEndOfInput as
// a code unit with a position, and does math relative to that
// position.
buffer_cursor_++;
return kEndOfInput;
}
}
// Returns and advances past the next UTF-16 code unit in the input stream
// that meets the checks requirement. If there are no more code units it
// returns kEndOfInput.
template <typename FunctionType>
V8_INLINE uc32 AdvanceUntil(FunctionType check) {
while (true) {
auto next_cursor_pos =
std::find_if(buffer_cursor_, buffer_end_, [&check](uint16_t raw_c0_) {
uc32 c0_ = static_cast<uc32>(raw_c0_);
return check(c0_);
});
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
buffer_cursor_++;
return kEndOfInput;
}
} else {
buffer_cursor_ = next_cursor_pos + 1;
return static_cast<uc32>(*next_cursor_pos);
}
}
}
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
}
}
// Go back one by two characters in the input stream. (This is the same as
// calling Back() twice. But Back() may - in some instances - do substantial
// work. Back2() guarantees this work will be done only once.)
inline void Back2() {
if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
buffer_cursor_ -= 2;
} else {
ReadBlockAt(pos() - 2);
}
}
inline size_t pos() const {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
}
}
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
protected:
Utf16CharacterStream(const uint16_t* buffer_start,
const uint16_t* buffer_cursor,
const uint16_t* buffer_end, size_t buffer_pos)
: buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = ReadBlock();
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
// 3, We should have more characters available iff success.
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_cursor_, buffer_end_);
DCHECK_LE(buffer_start_, buffer_cursor_);
DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
// buffer_cursor_ would point to the beginning of the buffer, and
// buffer_pos would be the old pos().
// - a stream with existing buffer chunks would set buffer_start_ and
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
const uint16_t* buffer_start_;
const uint16_t* buffer_cursor_;
const uint16_t* buffer_end_;
size_t buffer_pos_;
};
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// JavaScript Scanner. // JavaScript Scanner.
...@@ -232,11 +76,11 @@ class Scanner { ...@@ -232,11 +76,11 @@ class Scanner {
// -1 is outside of the range of any real source code. // -1 is outside of the range of any real source code.
static const int kNoOctalLocation = -1; static const int kNoOctalLocation = -1;
static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput; static const uc32 kEndOfInput = ScannerStream::kEndOfInput;
explicit Scanner(UnicodeCache* scanner_contants); explicit Scanner(UnicodeCache* scanner_contants);
void Initialize(Utf16CharacterStream* source, bool is_module); void Initialize(CharacterStream<uint16_t>* source, bool is_module);
// Returns the next token and advances input. // Returns the next token and advances input.
Token::Value Next(); Token::Value Next();
...@@ -847,8 +691,8 @@ class Scanner { ...@@ -847,8 +691,8 @@ class Scanner {
TokenDesc next_; // desc for next token (one token look-ahead) TokenDesc next_; // desc for next token (one token look-ahead)
TokenDesc next_next_; // desc for the token after next (after PeakAhead()) TokenDesc next_next_; // desc for the token after next (after PeakAhead())
// Input stream. Must be initialized to an Utf16CharacterStream. // Input stream. Must be initialized to a CharacterStream.
Utf16CharacterStream* source_; CharacterStream<uint16_t>* source_;
// Last-seen positions of potentially problematic tokens. // Last-seen positions of potentially problematic tokens.
Location octal_pos_; Location octal_pos_;
......
This diff is collapsed.
...@@ -27,7 +27,7 @@ struct ScannerTestHelper { ...@@ -27,7 +27,7 @@ struct ScannerTestHelper {
scanner(std::move(other.scanner)) {} scanner(std::move(other.scanner)) {}
std::unique_ptr<UnicodeCache> unicode_cache; std::unique_ptr<UnicodeCache> unicode_cache;
std::unique_ptr<Utf16CharacterStream> stream; std::unique_ptr<CharacterStream<uint16_t>> stream;
std::unique_ptr<Scanner> scanner; std::unique_ptr<Scanner> scanner;
Scanner* operator->() const { return scanner.get(); } Scanner* operator->() const { return scanner.get(); }
......
...@@ -396,8 +396,7 @@ TEST(PreParseOverflow) { ...@@ -396,8 +396,7 @@ TEST(PreParseOverflow) {
CHECK_EQ(i::PreParser::kPreParseStackOverflow, result); CHECK_EQ(i::PreParser::kPreParseStackOverflow, result);
} }
void TestStreamScanner(i::CharacterStream<uint16_t>* stream,
void TestStreamScanner(i::Utf16CharacterStream* stream,
i::Token::Value* expected_tokens, i::Token::Value* expected_tokens,
int skip_pos = 0, // Zero means not skipping. int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) { int skip_to = 0) {
...@@ -420,8 +419,7 @@ void TestStreamScanner(i::Utf16CharacterStream* stream, ...@@ -420,8 +419,7 @@ void TestStreamScanner(i::Utf16CharacterStream* stream,
TEST(StreamScanner) { TEST(StreamScanner) {
v8::V8::Initialize(); v8::V8::Initialize();
const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib"; const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
std::unique_ptr<i::Utf16CharacterStream> stream1( auto stream1(i::ScannerStream::ForTesting(str1));
i::ScannerStream::ForTesting(str1));
i::Token::Value expectations1[] = { i::Token::Value expectations1[] = {
i::Token::LBRACE, i::Token::LBRACE,
i::Token::IDENTIFIER, i::Token::IDENTIFIER,
...@@ -439,8 +437,7 @@ TEST(StreamScanner) { ...@@ -439,8 +437,7 @@ TEST(StreamScanner) {
TestStreamScanner(stream1.get(), expectations1, 0, 0); TestStreamScanner(stream1.get(), expectations1, 0, 0);
const char* str2 = "case default const {THIS\nPART\nSKIPPED} do"; const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
std::unique_ptr<i::Utf16CharacterStream> stream2( auto stream2(i::ScannerStream::ForTesting(str2));
i::ScannerStream::ForTesting(str2));
i::Token::Value expectations2[] = { i::Token::Value expectations2[] = {
i::Token::CASE, i::Token::CASE,
i::Token::DEFAULT, i::Token::DEFAULT,
...@@ -470,8 +467,7 @@ TEST(StreamScanner) { ...@@ -470,8 +467,7 @@ TEST(StreamScanner) {
for (int i = 0; i <= 4; i++) { for (int i = 0; i <= 4; i++) {
expectations3[6 - i] = i::Token::ILLEGAL; expectations3[6 - i] = i::Token::ILLEGAL;
expectations3[5 - i] = i::Token::EOS; expectations3[5 - i] = i::Token::EOS;
std::unique_ptr<i::Utf16CharacterStream> stream3( auto stream3(i::ScannerStream::ForTesting(str3));
i::ScannerStream::ForTesting(str3));
TestStreamScanner(stream3.get(), expectations3, 1, 1 + i); TestStreamScanner(stream3.get(), expectations3, 1, 1 + i);
} }
} }
...@@ -1159,6 +1155,7 @@ void SetParserFlags(i::PreParser* parser, i::EnumSet<ParserFlag> flags) { ...@@ -1159,6 +1155,7 @@ void SetParserFlags(i::PreParser* parser, i::EnumSet<ParserFlag> flags) {
flags.Contains(kAllowHarmonyNumericSeparator)); flags.Contains(kAllowHarmonyNumericSeparator));
} }
template <typename Char>
void TestParserSyncWithFlags(i::Handle<i::String> source, void TestParserSyncWithFlags(i::Handle<i::String> source,
i::EnumSet<ParserFlag> flags, i::EnumSet<ParserFlag> flags,
ParserSyncTestResult result, ParserSyncTestResult result,
...@@ -1173,7 +1170,7 @@ void TestParserSyncWithFlags(i::Handle<i::String> source, ...@@ -1173,7 +1170,7 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
i::PendingCompilationErrorHandler pending_error_handler; i::PendingCompilationErrorHandler pending_error_handler;
if (test_preparser) { if (test_preparser) {
i::Scanner scanner(isolate->unicode_cache()); i::Scanner scanner(isolate->unicode_cache());
std::unique_ptr<i::Utf16CharacterStream> stream( std::unique_ptr<i::ScannerStream> stream(
i::ScannerStream::For(isolate, source)); i::ScannerStream::For(isolate, source));
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME); i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory( i::AstValueFactory ast_value_factory(
...@@ -1184,7 +1181,8 @@ void TestParserSyncWithFlags(i::Handle<i::String> source, ...@@ -1184,7 +1181,8 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
isolate->counters()->runtime_call_stats(), isolate->counters()->runtime_call_stats(),
isolate->logger(), -1, is_module); isolate->logger(), -1, is_module);
SetParserFlags(&preparser, flags); SetParserFlags(&preparser, flags);
scanner.Initialize(stream.get(), is_module); scanner.Initialize(static_cast<CharacterStream<Char>*>(stream.get()),
is_module);
i::PreParser::PreParseResult result = preparser.PreParseProgram(); i::PreParser::PreParseResult result = preparser.PreParseProgram();
CHECK_EQ(i::PreParser::kPreParseSuccess, result); CHECK_EQ(i::PreParser::kPreParseSuccess, result);
} }
...@@ -1294,8 +1292,15 @@ void TestParserSync(const char* source, const ParserFlag* varying_flags, ...@@ -1294,8 +1292,15 @@ void TestParserSync(const char* source, const ParserFlag* varying_flags,
++flag_index) { ++flag_index) {
flags.Remove(always_false_flags[flag_index]); flags.Remove(always_false_flags[flag_index]);
} }
TestParserSyncWithFlags(str, flags, result, is_module, test_preparser, if (str->IsSeqOneByteString()) {
ignore_error_msg); // TODO(verwaest): Switch to uint8_t.
TestParserSyncWithFlags<uint16_t>(str, flags, result, is_module,
test_preparser, ignore_error_msg);
} else {
DCHECK(str->IsSeqTwoByteString());
TestParserSyncWithFlags<uint16_t>(str, flags, result, is_module,
test_preparser, ignore_error_msg);
}
} }
} }
......
...@@ -17,7 +17,8 @@ class AsmJsScannerTest : public ::testing::Test { ...@@ -17,7 +17,8 @@ class AsmJsScannerTest : public ::testing::Test {
protected: protected:
void SetupScanner(const char* source) { void SetupScanner(const char* source) {
stream = ScannerStream::ForTesting(source); stream = ScannerStream::ForTesting(source);
scanner.reset(new AsmJsScanner(stream.get())); scanner.reset(new AsmJsScanner(
static_cast<CharacterStream<uint16_t>*>(stream.get()), 0));
} }
void Skip(AsmJsScanner::token_t t) { void Skip(AsmJsScanner::token_t t) {
...@@ -41,7 +42,7 @@ class AsmJsScannerTest : public ::testing::Test { ...@@ -41,7 +42,7 @@ class AsmJsScannerTest : public ::testing::Test {
CHECK_EQ(scanner->Token(), AsmJsScanner::kParseError); CHECK_EQ(scanner->Token(), AsmJsScanner::kParseError);
} }
std::unique_ptr<Utf16CharacterStream> stream; std::unique_ptr<ScannerStream> stream;
std::unique_ptr<AsmJsScanner> scanner; std::unique_ptr<AsmJsScanner> scanner;
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment