Commit 2d40e2f4 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Prepare CharacterStreams for specializing scanner and parser by character type

This templatizes CharacterStream by char type, and makes them subclass ScannerStream.
Methods that are widely used by tests are marked virtual on ScannerStream and final on
CharacterStream<T> so the specialized scanner will know what to call. ParseInfo passes
around ScannerStream, but the scanner requires the explicit CharacterStream<T>. Since
AdvanceUntil is templatized by FunctionType, I couldn't mark that virtual; so instead
I adjusted those tests to operate directly on ucs2 (not utf8 since we'll drop that in
the future).

In the end no functionality was changed. Some calls became virtual in tests. This is
mainly just preparation.

Change-Id: I0b4def65d3eb8fa5c806027c7e9123a590ebbdb5
Reviewed-on: https://chromium-review.googlesource.com/1156690
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54848}
parent 65e3cea3
......@@ -234,13 +234,13 @@ UnoptimizedCompilationJob::Status AsmJsCompilationJob::ExecuteJobImpl() {
Zone* compile_zone = compilation_info()->zone();
Zone translate_zone(allocator_, ZONE_NAME);
Utf16CharacterStream* stream = parse_info()->character_stream();
ScannerStream* stream = parse_info()->character_stream();
base::Optional<AllowHandleDereference> allow_deref;
if (stream->can_access_heap()) {
allow_deref.emplace();
}
stream->Seek(compilation_info()->literal()->start_position());
wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream);
wasm::AsmJsParser parser(&translate_zone, stack_limit(), stream,
compilation_info()->literal()->start_position());
if (!parser.Run()) {
if (!FLAG_suppress_asm_messages) {
ReportCompilationFailure(parse_info(), parser.failure_location(),
......
......@@ -69,9 +69,9 @@ namespace wasm {
#define TOK(name) AsmJsScanner::kToken_##name
AsmJsParser::AsmJsParser(Zone* zone, uintptr_t stack_limit,
Utf16CharacterStream* stream)
ScannerStream* stream, int start)
: zone_(zone),
scanner_(stream),
scanner_(static_cast<CharacterStream<uint16_t>*>(stream), start),
module_builder_(new (zone) WasmModuleBuilder(zone)),
return_type_(nullptr),
stack_limit_(stack_limit),
......
......@@ -16,7 +16,7 @@
namespace v8 {
namespace internal {
class Utf16CharacterStream;
class ScannerStream;
namespace wasm {
......@@ -49,8 +49,8 @@ class AsmJsParser {
typedef EnumSet<StandardMember, uint64_t> StdlibSet;
explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
Utf16CharacterStream* stream);
explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, ScannerStream* stream,
int start);
bool Run();
const char* failure_message() const { return failure_message_; }
int failure_location() const { return failure_location_; }
......
......@@ -7,6 +7,7 @@
#include "src/char-predicates-inl.h"
#include "src/conversions.h"
#include "src/flags.h"
#include "src/parsing/scanner-character-streams.h"
#include "src/parsing/scanner.h"
#include "src/unicode-cache.h"
......@@ -19,7 +20,7 @@ namespace {
static const int kMaxIdentifierCount = 0xF000000;
};
AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream)
AsmJsScanner::AsmJsScanner(CharacterStream<uint16_t>* stream, int start)
: stream_(stream),
token_(kUninitialized),
preceding_token_(kUninitialized),
......@@ -33,6 +34,7 @@ AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream)
double_value_(0.0),
unsigned_value_(0),
preceded_by_newline_(false) {
stream->Seek(start);
#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
STDLIB_MATH_FUNCTION_LIST(V)
STDLIB_ARRAY_TYPE_LIST(V)
......
......@@ -16,7 +16,8 @@
namespace v8 {
namespace internal {
class Utf16CharacterStream;
template <typename Char>
class CharacterStream;
// A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/
......@@ -31,7 +32,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
public:
typedef int32_t token_t;
explicit AsmJsScanner(Utf16CharacterStream* stream);
AsmJsScanner(CharacterStream<uint16_t>* stream, int start);
// Get current token.
token_t Token() const { return token_; }
......@@ -136,7 +137,7 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
// clang-format on
private:
Utf16CharacterStream* stream_;
CharacterStream<uint16_t>* stream_;
token_t token_;
token_t preceding_token_;
token_t next_token_; // Only set when in {rewind} state.
......
......@@ -133,7 +133,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
DCHECK(script->type() != Script::TYPE_NATIVE);
Handle<String> source(String::cast(script->source()), isolate);
if (source->IsExternalTwoByteString() || source->IsExternalOneByteString()) {
std::unique_ptr<Utf16CharacterStream> stream(ScannerStream::For(
std::unique_ptr<ScannerStream> stream(ScannerStream::For(
isolate, source, shared_->StartPosition(), shared_->EndPosition()));
parse_info_->set_character_stream(std::move(stream));
} else {
......@@ -191,7 +191,7 @@ void UnoptimizedCompileJob::PrepareOnMainThread(Isolate* isolate) {
.ToHandleChecked();
}
wrapper_ = isolate->global_handles()->Create(*wrapper);
std::unique_ptr<Utf16CharacterStream> stream(
std::unique_ptr<ScannerStream> stream(
ScannerStream::For(isolate, wrapper_, shared_->StartPosition() - offset,
shared_->EndPosition() - offset));
parse_info_->set_character_stream(std::move(stream));
......
......@@ -966,7 +966,7 @@ BackgroundCompileTask::BackgroundCompileTask(ScriptStreamingData* source,
info->set_runtime_call_stats(nullptr);
}
info->set_toplevel();
std::unique_ptr<Utf16CharacterStream> stream(
std::unique_ptr<ScannerStream> stream(
ScannerStream::For(source->source_stream.get(), source->encoding,
info->runtime_call_stats()));
info->set_character_stream(std::move(stream));
......
......@@ -198,7 +198,7 @@ void ParseInfo::AllocateSourceRangeMap() {
void ParseInfo::ResetCharacterStream() { character_stream_.reset(); }
void ParseInfo::set_character_stream(
std::unique_ptr<Utf16CharacterStream> character_stream) {
std::unique_ptr<ScannerStream> character_stream) {
DCHECK_NULL(character_stream_);
character_stream_.swap(character_stream);
}
......
......@@ -31,7 +31,7 @@ class RuntimeCallStats;
class Logger;
class SourceRangeMap;
class UnicodeCache;
class Utf16CharacterStream;
class ScannerStream;
class Zone;
// A container for the inputs, configuration options, and outputs of parsing.
......@@ -97,11 +97,8 @@ class V8_EXPORT_PRIVATE ParseInfo {
: NO_PARSE_RESTRICTION;
}
Utf16CharacterStream* character_stream() const {
return character_stream_.get();
}
void set_character_stream(
std::unique_ptr<Utf16CharacterStream> character_stream);
ScannerStream* character_stream() const { return character_stream_.get(); }
void set_character_stream(std::unique_ptr<ScannerStream> character_stream);
void ResetCharacterStream();
v8::Extension* extension() const { return extension_; }
......@@ -274,7 +271,7 @@ class V8_EXPORT_PRIVATE ParseInfo {
MaybeHandle<ScopeInfo> maybe_outer_scope_info_;
//----------- Inputs+Outputs of parsing and scope analysis -----------------
std::unique_ptr<Utf16CharacterStream> character_stream_;
std::unique_ptr<ScannerStream> character_stream_;
ConsumedPreParsedScopeData consumed_preparsed_scope_data_;
std::shared_ptr<AstValueFactory> ast_value_factory_;
const class AstStringConstants* ast_string_constants_;
......
......@@ -507,7 +507,9 @@ FunctionLiteral* Parser::ParseProgram(Isolate* isolate, ParseInfo* info) {
// Initialize parser state.
DeserializeScopeChain(isolate, info, info->maybe_outer_scope_info());
scanner_.Initialize(info->character_stream(), info->is_module());
auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
FunctionLiteral* result = DoParseProgram(isolate, info);
MaybeResetCharacterStream(info, result);
......@@ -701,7 +703,9 @@ FunctionLiteral* Parser::ParseFunction(Isolate* isolate, ParseInfo* info,
// Initialize parser state.
Handle<String> name(shared_info->Name(), isolate);
info->set_function_name(ast_value_factory()->GetString(name));
scanner_.Initialize(info->character_stream(), info->is_module());
auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
FunctionLiteral* result =
DoParseFunction(isolate, info, info->function_name());
......@@ -3435,7 +3439,9 @@ void Parser::ParseOnBackground(ParseInfo* info) {
DCHECK_NULL(info->literal());
FunctionLiteral* result = nullptr;
scanner_.Initialize(info->character_stream(), info->is_module());
auto stream =
static_cast<CharacterStream<uint16_t>*>(info->character_stream());
scanner_.Initialize(stream, info->is_module());
DCHECK(info->maybe_outer_scope_info().is_null());
DCHECK(original_scope_);
......
......@@ -26,8 +26,7 @@ bool ParseProgram(ParseInfo* info, Isolate* isolate) {
// Create a character stream for the parser.
Handle<String> source(String::cast(info->script()->source()), isolate);
isolate->counters()->total_parse_size()->Increment(source->length());
std::unique_ptr<Utf16CharacterStream> stream(
ScannerStream::For(isolate, source));
std::unique_ptr<ScannerStream> stream(ScannerStream::For(isolate, source));
info->set_character_stream(std::move(stream));
Parser parser(info);
......@@ -61,7 +60,7 @@ bool ParseFunction(ParseInfo* info, Handle<SharedFunctionInfo> shared_info,
// Create a character stream for the parser.
Handle<String> source(String::cast(info->script()->source()), isolate);
isolate->counters()->total_parse_size()->Increment(source->length());
std::unique_ptr<Utf16CharacterStream> stream(
std::unique_ptr<ScannerStream> stream(
ScannerStream::For(isolate, source, shared_info->StartPosition(),
shared_info->EndPosition()));
info->set_character_stream(std::move(stream));
......
......@@ -157,7 +157,7 @@ class ChunkedStream {
// Chars are buffered if either the underlying stream isn't utf-16 or the
// underlying utf-16 stream might move (is on-heap).
template <typename Char, template <typename T> class ByteStream>
class BufferedCharacterStream : public Utf16CharacterStream {
class BufferedCharacterStream : public CharacterStream<uint16_t> {
public:
template <class... TArgs>
BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
......@@ -194,7 +194,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
template <template <typename T> class ByteStream>
class UnbufferedCharacterStream : public Utf16CharacterStream {
class UnbufferedCharacterStream : public CharacterStream<uint16_t> {
public:
template <class... TArgs>
UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
......@@ -268,7 +268,7 @@ class RelocatingCharacterStream
// even positions before the current).
//
// TODO(verwaest): Remove together with Utf8 external streaming streams.
class BufferedUtf16CharacterStream : public Utf16CharacterStream {
class BufferedUtf16CharacterStream : public CharacterStream<uint16_t> {
public:
BufferedUtf16CharacterStream();
......@@ -287,7 +287,7 @@ class BufferedUtf16CharacterStream : public Utf16CharacterStream {
};
BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
: Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}
: CharacterStream(buffer_, buffer_, buffer_, 0) {}
bool BufferedUtf16CharacterStream::ReadBlock() {
DCHECK_EQ(buffer_start_, buffer_);
......@@ -585,13 +585,12 @@ size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) {
// ----------------------------------------------------------------------------
// ScannerStream: Create stream instances.
Utf16CharacterStream* ScannerStream::For(Isolate* isolate,
Handle<String> data) {
ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data) {
return ScannerStream::For(isolate, data, 0, data->length());
}
Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos) {
ScannerStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos) {
DCHECK_GE(start_pos, 0);
DCHECK_LE(start_pos, end_pos);
DCHECK_LE(end_pos, data->length());
......@@ -629,20 +628,20 @@ Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
}
}
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
std::unique_ptr<CharacterStream<uint16_t>> ScannerStream::ForTesting(
const char* data) {
return ScannerStream::ForTesting(data, strlen(data));
}
std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
std::unique_ptr<CharacterStream<uint16_t>> ScannerStream::ForTesting(
const char* data, size_t length) {
return std::unique_ptr<Utf16CharacterStream>(
return std::unique_ptr<CharacterStream<uint16_t>>(
new BufferedCharacterStream<uint8_t, ExternalStringStream>(
static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
static_cast<size_t>(length)));
}
Utf16CharacterStream* ScannerStream::For(
ScannerStream* ScannerStream::For(
ScriptCompiler::ExternalSourceStream* source_stream,
v8::ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats) {
......
......@@ -5,6 +5,8 @@
#ifndef V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
#include <algorithm>
#include "include/v8.h" // for v8::ScriptCompiler
#include "src/globals.h"
......@@ -13,24 +15,182 @@ namespace internal {
template <typename T>
class Handle;
class Utf16CharacterStream;
template <typename Char>
class CharacterStream;
class RuntimeCallStats;
class String;
class V8_EXPORT_PRIVATE ScannerStream {
public:
static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data);
static Utf16CharacterStream* For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos);
static Utf16CharacterStream* For(
ScriptCompiler::ExternalSourceStream* source_stream,
ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats);
static const uc32 kEndOfInput = -1;
static ScannerStream* For(Isolate* isolate, Handle<String> data);
static ScannerStream* For(Isolate* isolate, Handle<String> data,
int start_pos, int end_pos);
static ScannerStream* For(ScriptCompiler::ExternalSourceStream* source_stream,
ScriptCompiler::StreamedSource::Encoding encoding,
RuntimeCallStats* stats);
// For testing:
static std::unique_ptr<Utf16CharacterStream> ForTesting(const char* data);
static std::unique_ptr<Utf16CharacterStream> ForTesting(const char* data,
size_t length);
static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(
const char* data);
static std::unique_ptr<CharacterStream<uint16_t>> ForTesting(const char* data,
size_t length);
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
virtual uc32 Advance() = 0;
virtual void Seek(size_t pos) = 0;
virtual size_t pos() const = 0;
virtual void Back() = 0;
virtual ~ScannerStream() {}
};
template <typename Char>
class CharacterStream : public ScannerStream {
public:
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() final {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<uc32>(*(buffer_cursor_++));
} else if (ReadBlockChecked()) {
return static_cast<uc32>(*(buffer_cursor_++));
} else {
// Note: currently the following increment is necessary to avoid a
// parser problem! The scanner treats the final kEndOfInput as
// a code unit with a position, and does math relative to that
// position.
buffer_cursor_++;
return kEndOfInput;
}
}
// Returns and advances past the next UTF-16 code unit in the input stream
// that meets the checks requirement. If there are no more code units it
// returns kEndOfInput.
template <typename FunctionType>
V8_INLINE uc32 AdvanceUntil(FunctionType check) {
while (true) {
auto next_cursor_pos =
std::find_if(buffer_cursor_, buffer_end_, [&check](Char raw_c0) {
uc32 c0 = static_cast<uc32>(raw_c0);
return check(c0);
});
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
buffer_cursor_++;
return kEndOfInput;
}
} else {
buffer_cursor_ = next_cursor_pos + 1;
return static_cast<uc32>(*next_cursor_pos);
}
}
}
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() final {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
}
}
// Go back one by two characters in the input stream. (This is the same as
// calling Back() twice. But Back() may - in some instances - do substantial
// work. Back2() guarantees this work will be done only once.)
inline void Back2() {
if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
buffer_cursor_ -= 2;
} else {
ReadBlockAt(pos() - 2);
}
}
inline size_t pos() const final {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) final {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
}
}
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
protected:
CharacterStream(const uint16_t* buffer_start, const uint16_t* buffer_cursor,
const uint16_t* buffer_end, size_t buffer_pos)
: buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
CharacterStream() : CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = ReadBlock();
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
// 3, We should have more characters available iff success.
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_cursor_, buffer_end_);
DCHECK_LE(buffer_start_, buffer_cursor_);
DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
// buffer_cursor_ would point to the beginning of the buffer, and
// buffer_pos would be the old pos().
// - a stream with existing buffer chunks would set buffer_start_ and
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
const Char* buffer_start_;
const Char* buffer_cursor_;
const Char* buffer_end_;
size_t buffer_pos_;
};
} // namespace internal
......
......@@ -188,7 +188,7 @@ Scanner::Scanner(UnicodeCache* unicode_cache)
allow_harmony_bigint_(false),
allow_harmony_numeric_separator_(false) {}
void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) {
void Scanner::Initialize(CharacterStream<uint16_t>* source, bool is_module) {
DCHECK_NOT_NULL(source);
source_ = source;
is_module_ = is_module;
......
......@@ -7,13 +7,12 @@
#ifndef V8_PARSING_SCANNER_H_
#define V8_PARSING_SCANNER_H_
#include <algorithm>
#include "src/allocation.h"
#include "src/base/logging.h"
#include "src/char-predicates.h"
#include "src/globals.h"
#include "src/messages.h"
#include "src/parsing/scanner-character-streams.h"
#include "src/parsing/token.h"
#include "src/unicode-decoder.h"
#include "src/unicode.h"
......@@ -30,161 +29,6 @@ class ExternalTwoByteString;
class ParserRecorder;
class UnicodeCache;
// ---------------------------------------------------------------------
// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
// A code unit is a 16 bit value representing either a 16 bit code point
// or one part of a surrogate pair that make a single 21 bit code point.
class Utf16CharacterStream {
public:
static const uc32 kEndOfInput = -1;
virtual ~Utf16CharacterStream() { }
// Returns and advances past the next UTF-16 code unit in the input
// stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() {
if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
return static_cast<uc32>(*(buffer_cursor_++));
} else if (ReadBlockChecked()) {
return static_cast<uc32>(*(buffer_cursor_++));
} else {
// Note: currently the following increment is necessary to avoid a
// parser problem! The scanner treats the final kEndOfInput as
// a code unit with a position, and does math relative to that
// position.
buffer_cursor_++;
return kEndOfInput;
}
}
// Returns and advances past the next UTF-16 code unit in the input stream
// that meets the checks requirement. If there are no more code units it
// returns kEndOfInput.
template <typename FunctionType>
V8_INLINE uc32 AdvanceUntil(FunctionType check) {
while (true) {
auto next_cursor_pos =
std::find_if(buffer_cursor_, buffer_end_, [&check](uint16_t raw_c0_) {
uc32 c0_ = static_cast<uc32>(raw_c0_);
return check(c0_);
});
if (next_cursor_pos == buffer_end_) {
buffer_cursor_ = buffer_end_;
if (!ReadBlockChecked()) {
buffer_cursor_++;
return kEndOfInput;
}
} else {
buffer_cursor_ = next_cursor_pos + 1;
return static_cast<uc32>(*next_cursor_pos);
}
}
}
// Go back one by one character in the input stream.
// This undoes the most recent Advance().
inline void Back() {
// The common case - if the previous character is within
// buffer_start_ .. buffer_end_ will be handles locally.
// Otherwise, a new block is requested.
if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
buffer_cursor_--;
} else {
ReadBlockAt(pos() - 1);
}
}
// Go back one by two characters in the input stream. (This is the same as
// calling Back() twice. But Back() may - in some instances - do substantial
// work. Back2() guarantees this work will be done only once.)
inline void Back2() {
if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
buffer_cursor_ -= 2;
} else {
ReadBlockAt(pos() - 2);
}
}
inline size_t pos() const {
return buffer_pos_ + (buffer_cursor_ - buffer_start_);
}
inline void Seek(size_t pos) {
if (V8_LIKELY(pos >= buffer_pos_ &&
pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
} else {
ReadBlockAt(pos);
}
}
// Returns true if the stream could access the V8 heap after construction.
virtual bool can_access_heap() = 0;
protected:
Utf16CharacterStream(const uint16_t* buffer_start,
const uint16_t* buffer_cursor,
const uint16_t* buffer_end, size_t buffer_pos)
: buffer_start_(buffer_start),
buffer_cursor_(buffer_cursor),
buffer_end_(buffer_end),
buffer_pos_(buffer_pos) {}
Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
bool ReadBlockChecked() {
size_t position = pos();
USE(position);
bool success = ReadBlock();
// Post-conditions: 1, We should always be at the right position.
// 2, Cursor should be inside the buffer.
// 3, We should have more characters available iff success.
DCHECK_EQ(pos(), position);
DCHECK_LE(buffer_cursor_, buffer_end_);
DCHECK_LE(buffer_start_, buffer_cursor_);
DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
return success;
}
void ReadBlockAt(size_t new_pos) {
// The callers of this method (Back/Back2/Seek) should handle the easy
// case (seeking within the current buffer), and we should only get here
// if we actually require new data.
// (This is really an efficiency check, not a correctness invariant.)
DCHECK(new_pos < buffer_pos_ ||
new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
// Change pos() to point to new_pos.
buffer_pos_ = new_pos;
buffer_cursor_ = buffer_start_;
DCHECK_EQ(pos(), new_pos);
ReadBlockChecked();
}
// Read more data, and update buffer_*_ to point to it.
// Returns true if more data was available.
//
// ReadBlock() may modify any of the buffer_*_ members, but must sure that
// the result of pos() remains unaffected.
//
// Examples:
// - a stream could either fill a separate buffer. Then buffer_start_ and
// buffer_cursor_ would point to the beginning of the buffer, and
// buffer_pos would be the old pos().
// - a stream with existing buffer chunks would set buffer_start_ and
// buffer_end_ to cover the full chunk, and then buffer_cursor_ would
// point into the middle of the buffer, while buffer_pos_ would describe
// the start of the buffer.
virtual bool ReadBlock() = 0;
const uint16_t* buffer_start_;
const uint16_t* buffer_cursor_;
const uint16_t* buffer_end_;
size_t buffer_pos_;
};
// ----------------------------------------------------------------------------
// JavaScript Scanner.
......@@ -232,11 +76,11 @@ class Scanner {
// -1 is outside of the range of any real source code.
static const int kNoOctalLocation = -1;
static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
static const uc32 kEndOfInput = ScannerStream::kEndOfInput;
explicit Scanner(UnicodeCache* scanner_contants);
void Initialize(Utf16CharacterStream* source, bool is_module);
void Initialize(CharacterStream<uint16_t>* source, bool is_module);
// Returns the next token and advances input.
Token::Value Next();
......@@ -847,8 +691,8 @@ class Scanner {
TokenDesc next_; // desc for next token (one token look-ahead)
TokenDesc next_next_; // desc for the token after next (after PeakAhead())
// Input stream. Must be initialized to an Utf16CharacterStream.
Utf16CharacterStream* source_;
// Input stream. Must be initialized to a CharacterStream.
CharacterStream<uint16_t>* source_;
// Last-seen positions of potentially problematic tokens.
Location octal_pos_;
......
This diff is collapsed.
......@@ -27,7 +27,7 @@ struct ScannerTestHelper {
scanner(std::move(other.scanner)) {}
std::unique_ptr<UnicodeCache> unicode_cache;
std::unique_ptr<Utf16CharacterStream> stream;
std::unique_ptr<CharacterStream<uint16_t>> stream;
std::unique_ptr<Scanner> scanner;
Scanner* operator->() const { return scanner.get(); }
......
......@@ -396,8 +396,7 @@ TEST(PreParseOverflow) {
CHECK_EQ(i::PreParser::kPreParseStackOverflow, result);
}
void TestStreamScanner(i::Utf16CharacterStream* stream,
void TestStreamScanner(i::CharacterStream<uint16_t>* stream,
i::Token::Value* expected_tokens,
int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) {
......@@ -420,8 +419,7 @@ void TestStreamScanner(i::Utf16CharacterStream* stream,
TEST(StreamScanner) {
v8::V8::Initialize();
const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
std::unique_ptr<i::Utf16CharacterStream> stream1(
i::ScannerStream::ForTesting(str1));
auto stream1(i::ScannerStream::ForTesting(str1));
i::Token::Value expectations1[] = {
i::Token::LBRACE,
i::Token::IDENTIFIER,
......@@ -439,8 +437,7 @@ TEST(StreamScanner) {
TestStreamScanner(stream1.get(), expectations1, 0, 0);
const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
std::unique_ptr<i::Utf16CharacterStream> stream2(
i::ScannerStream::ForTesting(str2));
auto stream2(i::ScannerStream::ForTesting(str2));
i::Token::Value expectations2[] = {
i::Token::CASE,
i::Token::DEFAULT,
......@@ -470,8 +467,7 @@ TEST(StreamScanner) {
for (int i = 0; i <= 4; i++) {
expectations3[6 - i] = i::Token::ILLEGAL;
expectations3[5 - i] = i::Token::EOS;
std::unique_ptr<i::Utf16CharacterStream> stream3(
i::ScannerStream::ForTesting(str3));
auto stream3(i::ScannerStream::ForTesting(str3));
TestStreamScanner(stream3.get(), expectations3, 1, 1 + i);
}
}
......@@ -1159,6 +1155,7 @@ void SetParserFlags(i::PreParser* parser, i::EnumSet<ParserFlag> flags) {
flags.Contains(kAllowHarmonyNumericSeparator));
}
template <typename Char>
void TestParserSyncWithFlags(i::Handle<i::String> source,
i::EnumSet<ParserFlag> flags,
ParserSyncTestResult result,
......@@ -1173,7 +1170,7 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
i::PendingCompilationErrorHandler pending_error_handler;
if (test_preparser) {
i::Scanner scanner(isolate->unicode_cache());
std::unique_ptr<i::Utf16CharacterStream> stream(
std::unique_ptr<i::ScannerStream> stream(
i::ScannerStream::For(isolate, source));
i::Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
i::AstValueFactory ast_value_factory(
......@@ -1184,7 +1181,8 @@ void TestParserSyncWithFlags(i::Handle<i::String> source,
isolate->counters()->runtime_call_stats(),
isolate->logger(), -1, is_module);
SetParserFlags(&preparser, flags);
scanner.Initialize(stream.get(), is_module);
scanner.Initialize(static_cast<CharacterStream<Char>*>(stream.get()),
is_module);
i::PreParser::PreParseResult result = preparser.PreParseProgram();
CHECK_EQ(i::PreParser::kPreParseSuccess, result);
}
......@@ -1294,8 +1292,15 @@ void TestParserSync(const char* source, const ParserFlag* varying_flags,
++flag_index) {
flags.Remove(always_false_flags[flag_index]);
}
TestParserSyncWithFlags(str, flags, result, is_module, test_preparser,
ignore_error_msg);
if (str->IsSeqOneByteString()) {
// TODO(verwaest): Switch to uint8_t.
TestParserSyncWithFlags<uint16_t>(str, flags, result, is_module,
test_preparser, ignore_error_msg);
} else {
DCHECK(str->IsSeqTwoByteString());
TestParserSyncWithFlags<uint16_t>(str, flags, result, is_module,
test_preparser, ignore_error_msg);
}
}
}
......
......@@ -17,7 +17,8 @@ class AsmJsScannerTest : public ::testing::Test {
protected:
void SetupScanner(const char* source) {
stream = ScannerStream::ForTesting(source);
scanner.reset(new AsmJsScanner(stream.get()));
scanner.reset(new AsmJsScanner(
static_cast<CharacterStream<uint16_t>*>(stream.get()), 0));
}
void Skip(AsmJsScanner::token_t t) {
......@@ -41,7 +42,7 @@ class AsmJsScannerTest : public ::testing::Test {
CHECK_EQ(scanner->Token(), AsmJsScanner::kParseError);
}
std::unique_ptr<Utf16CharacterStream> stream;
std::unique_ptr<ScannerStream> stream;
std::unique_ptr<AsmJsScanner> scanner;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment