Commit fca7b2ce authored by sgjesse@chromium.org's avatar sgjesse@chromium.org

Refactor the scanner interface

No need to create sub strings for lazy compiles. The scanner will start from the start position provided.

Moved the creating of character streams into the scanner where possible. This uses a input buffer in the scanner class instead of a stack allocated one.

Added a UTF16 buffer for reading external ascii strings (by templating the external two byte string one) as all the source for the builtins are exposed as external ascii strings.
Review URL: http://codereview.chromium.org/661367

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4007 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 30a8fc3e
...@@ -5248,7 +5248,7 @@ Object* JSObject::SetSlowElements(Object* len) { ...@@ -5248,7 +5248,7 @@ Object* JSObject::SetSlowElements(Object* len) {
case DICTIONARY_ELEMENTS: { case DICTIONARY_ELEMENTS: {
if (IsJSArray()) { if (IsJSArray()) {
uint32_t old_length = uint32_t old_length =
static_cast<uint32_t>(JSArray::cast(this)->length()->Number()); static_cast<uint32_t>(JSArray::cast(this)->length()->Number());
element_dictionary()->RemoveNumberEntries(new_length, old_length), element_dictionary()->RemoveNumberEntries(new_length, old_length),
JSArray::cast(this)->set_length(len); JSArray::cast(this)->set_length(len);
} }
......
...@@ -107,13 +107,13 @@ class Parser { ...@@ -107,13 +107,13 @@ class Parser {
// Returns NULL if parsing failed. // Returns NULL if parsing failed.
FunctionLiteral* ParseProgram(Handle<String> source, FunctionLiteral* ParseProgram(Handle<String> source,
unibrow::CharacterStream* stream,
bool in_global_context); bool in_global_context);
FunctionLiteral* ParseLazy(Handle<String> source, FunctionLiteral* ParseLazy(Handle<String> source,
Handle<String> name, Handle<String> name,
int start_position, bool is_expression); int start_position,
FunctionLiteral* ParseJson(Handle<String> source, int end_position,
unibrow::CharacterStream* stream); bool is_expression);
FunctionLiteral* ParseJson(Handle<String> source);
// The minimum number of contiguous assignment that will // The minimum number of contiguous assignment that will
// be treated as an initialization block. Benchmarks show that // be treated as an initialization block. Benchmarks show that
...@@ -1212,7 +1212,7 @@ bool Parser::PreParseProgram(Handle<String> source, ...@@ -1212,7 +1212,7 @@ bool Parser::PreParseProgram(Handle<String> source,
AssertNoZoneAllocation assert_no_zone_allocation; AssertNoZoneAllocation assert_no_zone_allocation;
AssertNoAllocation assert_no_allocation; AssertNoAllocation assert_no_allocation;
NoHandleAllocation no_handle_allocation; NoHandleAllocation no_handle_allocation;
scanner_.Init(source, stream, 0, JAVASCRIPT); scanner_.Initialize(source, stream, JAVASCRIPT);
ASSERT(target_stack_ == NULL); ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY; mode_ = PARSE_EAGERLY;
DummyScope top_scope; DummyScope top_scope;
...@@ -1226,7 +1226,6 @@ bool Parser::PreParseProgram(Handle<String> source, ...@@ -1226,7 +1226,6 @@ bool Parser::PreParseProgram(Handle<String> source,
FunctionLiteral* Parser::ParseProgram(Handle<String> source, FunctionLiteral* Parser::ParseProgram(Handle<String> source,
unibrow::CharacterStream* stream,
bool in_global_context) { bool in_global_context) {
CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT); CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
...@@ -1235,7 +1234,7 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source, ...@@ -1235,7 +1234,7 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source,
// Initialize parser state. // Initialize parser state.
source->TryFlatten(); source->TryFlatten();
scanner_.Init(source, stream, 0, JAVASCRIPT); scanner_.Initialize(source, JAVASCRIPT);
ASSERT(target_stack_ == NULL); ASSERT(target_stack_ == NULL);
// Compute the parsing mode. // Compute the parsing mode.
...@@ -1286,15 +1285,15 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source, ...@@ -1286,15 +1285,15 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source,
FunctionLiteral* Parser::ParseLazy(Handle<String> source, FunctionLiteral* Parser::ParseLazy(Handle<String> source,
Handle<String> name, Handle<String> name,
int start_position, int start_position,
int end_position,
bool is_expression) { bool is_expression) {
CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT); CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
HistogramTimerScope timer(&Counters::parse_lazy); HistogramTimerScope timer(&Counters::parse_lazy);
source->TryFlatten();
Counters::total_parse_size.Increment(source->length()); Counters::total_parse_size.Increment(source->length());
SafeStringInputBuffer buffer(source.location());
// Initialize parser state. // Initialize parser state.
scanner_.Init(source, &buffer, start_position, JAVASCRIPT); source->TryFlatten();
scanner_.Initialize(source, start_position, end_position, JAVASCRIPT);
ASSERT(target_stack_ == NULL); ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY; mode_ = PARSE_EAGERLY;
...@@ -1330,8 +1329,7 @@ FunctionLiteral* Parser::ParseLazy(Handle<String> source, ...@@ -1330,8 +1329,7 @@ FunctionLiteral* Parser::ParseLazy(Handle<String> source,
return result; return result;
} }
FunctionLiteral* Parser::ParseJson(Handle<String> source, FunctionLiteral* Parser::ParseJson(Handle<String> source) {
unibrow::CharacterStream* stream) {
CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT); CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
HistogramTimerScope timer(&Counters::parse); HistogramTimerScope timer(&Counters::parse);
...@@ -1339,7 +1337,7 @@ FunctionLiteral* Parser::ParseJson(Handle<String> source, ...@@ -1339,7 +1337,7 @@ FunctionLiteral* Parser::ParseJson(Handle<String> source,
// Initialize parser state. // Initialize parser state.
source->TryFlatten(TENURED); source->TryFlatten(TENURED);
scanner_.Init(source, stream, 0, JSON); scanner_.Initialize(source, JSON);
ASSERT(target_stack_ == NULL); ASSERT(target_stack_ == NULL);
FunctionLiteral* result = NULL; FunctionLiteral* result = NULL;
...@@ -5065,13 +5063,12 @@ FunctionLiteral* MakeAST(bool compile_in_global_context, ...@@ -5065,13 +5063,12 @@ FunctionLiteral* MakeAST(bool compile_in_global_context,
return NULL; return NULL;
} }
Handle<String> source = Handle<String>(String::cast(script->source())); Handle<String> source = Handle<String>(String::cast(script->source()));
SafeStringInputBuffer input(source.location());
FunctionLiteral* result; FunctionLiteral* result;
if (is_json) { if (is_json) {
ASSERT(compile_in_global_context); ASSERT(compile_in_global_context);
result = parser.ParseJson(source, &input); result = parser.ParseJson(source);
} else { } else {
result = parser.ParseProgram(source, &input, compile_in_global_context); result = parser.ParseProgram(source, compile_in_global_context);
} }
return result; return result;
} }
...@@ -5086,12 +5083,11 @@ FunctionLiteral* MakeLazyAST(Handle<Script> script, ...@@ -5086,12 +5083,11 @@ FunctionLiteral* MakeLazyAST(Handle<Script> script,
always_allow_natives_syntax = true; always_allow_natives_syntax = true;
AstBuildingParser parser(script, true, NULL, NULL); // always allow AstBuildingParser parser(script, true, NULL, NULL); // always allow
always_allow_natives_syntax = allow_natives_syntax_before; always_allow_natives_syntax = allow_natives_syntax_before;
// Parse the function by pulling the function source from the script source. // Parse the function by pointing to the function source in the script source.
Handle<String> script_source(String::cast(script->source())); Handle<String> script_source(String::cast(script->source()));
Handle<String> function_source =
SubString(script_source, start_position, end_position, TENURED);
FunctionLiteral* result = FunctionLiteral* result =
parser.ParseLazy(function_source, name, start_position, is_expression); parser.ParseLazy(script_source, name,
start_position, end_position, is_expression);
return result; return result;
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "v8.h" #include "v8.h"
#include "ast.h" #include "ast.h"
#include "handles.h"
#include "scanner.h" #include "scanner.h"
namespace v8 { namespace v8 {
...@@ -86,12 +87,7 @@ void UTF8Buffer::AddCharSlow(uc32 c) { ...@@ -86,12 +87,7 @@ void UTF8Buffer::AddCharSlow(uc32 c) {
UTF16Buffer::UTF16Buffer() UTF16Buffer::UTF16Buffer()
: pos_(0), size_(0) { } : pos_(0), end_(Scanner::kNoEndPosition) { }
Handle<String> UTF16Buffer::SubString(int start, int end) {
return internal::SubString(data_, start, end);
}
// CharacterStreamUTF16Buffer // CharacterStreamUTF16Buffer
...@@ -100,10 +96,14 @@ CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() ...@@ -100,10 +96,14 @@ CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
unibrow::CharacterStream* input) { unibrow::CharacterStream* input,
data_ = data; int start_position,
pos_ = 0; int end_position) {
stream_ = input; stream_ = input;
if (start_position > 0) {
SeekForward(start_position);
}
end_ = end_position != Scanner::kNoEndPosition ? end_position : kMaxInt;
} }
...@@ -115,6 +115,8 @@ void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { ...@@ -115,6 +115,8 @@ void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
uc32 CharacterStreamUTF16Buffer::Advance() { uc32 CharacterStreamUTF16Buffer::Advance() {
ASSERT(end_ != Scanner::kNoEndPosition);
ASSERT(end_ >= 0);
// NOTE: It is of importance to Persian / Farsi resources that we do // NOTE: It is of importance to Persian / Farsi resources that we do
// *not* strip format control characters in the scanner; see // *not* strip format control characters in the scanner; see
// //
...@@ -126,7 +128,7 @@ uc32 CharacterStreamUTF16Buffer::Advance() { ...@@ -126,7 +128,7 @@ uc32 CharacterStreamUTF16Buffer::Advance() {
if (!pushback_buffer()->is_empty()) { if (!pushback_buffer()->is_empty()) {
pos_++; pos_++;
return last_ = pushback_buffer()->RemoveLast(); return last_ = pushback_buffer()->RemoveLast();
} else if (stream_->has_more()) { } else if (stream_->has_more() && pos_ < end_) {
pos_++; pos_++;
uc32 next = stream_->GetNext(); uc32 next = stream_->GetNext();
return last_ = next; return last_ = next;
...@@ -146,25 +148,32 @@ void CharacterStreamUTF16Buffer::SeekForward(int pos) { ...@@ -146,25 +148,32 @@ void CharacterStreamUTF16Buffer::SeekForward(int pos) {
} }
// TwoByteStringUTF16Buffer // ExternalStringUTF16Buffer
TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() template <typename StringType, typename CharType>
ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
: raw_data_(NULL) { } : raw_data_(NULL) { }
void TwoByteStringUTF16Buffer::Initialize( template <typename StringType, typename CharType>
Handle<ExternalTwoByteString> data) { void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
Handle<StringType> data,
int start_position,
int end_position) {
ASSERT(!data.is_null()); ASSERT(!data.is_null());
data_ = data;
pos_ = 0;
raw_data_ = data->resource()->data(); raw_data_ = data->resource()->data();
size_ = data->length();
ASSERT(end_position <= data->length());
if (start_position > 0) {
SeekForward(start_position);
}
end_ =
end_position != Scanner::kNoEndPosition ? end_position : data->length();
} }
uc32 TwoByteStringUTF16Buffer::Advance() { template <typename StringType, typename CharType>
if (pos_ < size_) { uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
if (pos_ < end_) {
return raw_data_[pos_++]; return raw_data_[pos_++];
} else { } else {
// note: currently the following increment is necessary to avoid a // note: currently the following increment is necessary to avoid a
...@@ -175,14 +184,16 @@ uc32 TwoByteStringUTF16Buffer::Advance() { ...@@ -175,14 +184,16 @@ uc32 TwoByteStringUTF16Buffer::Advance() {
} }
void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
pos_--; pos_--;
ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
} }
void TwoByteStringUTF16Buffer::SeekForward(int pos) { template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
pos_ = pos; pos_ = pos;
} }
...@@ -327,21 +338,56 @@ Scanner::Scanner(ParserMode pre) ...@@ -327,21 +338,56 @@ Scanner::Scanner(ParserMode pre)
: stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { } : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }
void Scanner::Initialize(Handle<String> source,
ParserLanguage language) {
safe_string_input_buffer_.Reset(source.location());
Init(source, &safe_string_input_buffer_, 0, source->length(), language);
}
void Scanner::Initialize(Handle<String> source,
unibrow::CharacterStream* stream,
ParserLanguage language) {
Init(source, stream, 0, kNoEndPosition, language);
}
void Scanner::Initialize(Handle<String> source,
int start_position,
int end_position,
ParserLanguage language) {
safe_string_input_buffer_.Reset(source.location());
Init(source, &safe_string_input_buffer_,
start_position, end_position, language);
}
void Scanner::Init(Handle<String> source, void Scanner::Init(Handle<String> source,
unibrow::CharacterStream* stream, unibrow::CharacterStream* stream,
int position, int start_position,
int end_position,
ParserLanguage language) { ParserLanguage language) {
// Initialize the source buffer. // Initialize the source buffer.
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
two_byte_string_buffer_.Initialize( two_byte_string_buffer_.Initialize(
Handle<ExternalTwoByteString>::cast(source)); Handle<ExternalTwoByteString>::cast(source),
start_position,
end_position);
source_ = &two_byte_string_buffer_; source_ = &two_byte_string_buffer_;
} else if (!source.is_null() && StringShape(*source).IsExternalAscii()) {
ascii_string_buffer_.Initialize(
Handle<ExternalAsciiString>::cast(source),
start_position,
end_position);
source_ = &ascii_string_buffer_;
} else { } else {
char_stream_buffer_.Initialize(source, stream); char_stream_buffer_.Initialize(source,
stream,
start_position,
end_position);
source_ = &char_stream_buffer_; source_ = &char_stream_buffer_;
} }
position_ = position;
is_parsing_json_ = (language == JSON); is_parsing_json_ = (language == JSON);
// Set c0_ (one character ahead) // Set c0_ (one character ahead)
...@@ -358,11 +404,6 @@ void Scanner::Init(Handle<String> source, ...@@ -358,11 +404,6 @@ void Scanner::Init(Handle<String> source,
} }
Handle<String> Scanner::SubString(int start, int end) {
return source_->SubString(start - position_, end - position_);
}
Token::Value Scanner::Next() { Token::Value Scanner::Next() {
// BUG 1215673: Find a thread safe way to set a stack limit in // BUG 1215673: Find a thread safe way to set a stack limit in
// pre-parse mode. Otherwise, we cannot safely pre-parse from other // pre-parse mode. Otherwise, we cannot safely pre-parse from other
......
...@@ -84,32 +84,34 @@ class UTF8Buffer { ...@@ -84,32 +84,34 @@ class UTF8Buffer {
}; };
// Interface through which the scanner reads characters from the input source.
class UTF16Buffer { class UTF16Buffer {
public: public:
UTF16Buffer(); UTF16Buffer();
virtual ~UTF16Buffer() {} virtual ~UTF16Buffer() {}
virtual void PushBack(uc32 ch) = 0; virtual void PushBack(uc32 ch) = 0;
// returns a value < 0 when the buffer end is reached // Returns a value < 0 when the buffer end is reached.
virtual uc32 Advance() = 0; virtual uc32 Advance() = 0;
virtual void SeekForward(int pos) = 0; virtual void SeekForward(int pos) = 0;
int pos() const { return pos_; } int pos() const { return pos_; }
int size() const { return size_; }
Handle<String> SubString(int start, int end);
protected: protected:
Handle<String> data_; int pos_; // Current position in the buffer.
int pos_; int end_; // Position where scanning should stop (EOF).
int size_;
}; };
// UTF16 buffer to read characters from a character stream.
class CharacterStreamUTF16Buffer: public UTF16Buffer { class CharacterStreamUTF16Buffer: public UTF16Buffer {
public: public:
CharacterStreamUTF16Buffer(); CharacterStreamUTF16Buffer();
virtual ~CharacterStreamUTF16Buffer() {} virtual ~CharacterStreamUTF16Buffer() {}
void Initialize(Handle<String> data, unibrow::CharacterStream* stream); void Initialize(Handle<String> data,
unibrow::CharacterStream* stream,
int start_position,
int end_position);
virtual void PushBack(uc32 ch); virtual void PushBack(uc32 ch);
virtual uc32 Advance(); virtual uc32 Advance();
virtual void SeekForward(int pos); virtual void SeekForward(int pos);
...@@ -123,17 +125,21 @@ class CharacterStreamUTF16Buffer: public UTF16Buffer { ...@@ -123,17 +125,21 @@ class CharacterStreamUTF16Buffer: public UTF16Buffer {
}; };
class TwoByteStringUTF16Buffer: public UTF16Buffer { // UTF16 buffer to read characters from an external string.
template <typename StringType, typename CharType>
class ExternalStringUTF16Buffer: public UTF16Buffer {
public: public:
TwoByteStringUTF16Buffer(); ExternalStringUTF16Buffer();
virtual ~TwoByteStringUTF16Buffer() {} virtual ~ExternalStringUTF16Buffer() {}
void Initialize(Handle<ExternalTwoByteString> data); void Initialize(Handle<StringType> data,
int start_position,
int end_position);
virtual void PushBack(uc32 ch); virtual void PushBack(uc32 ch);
virtual uc32 Advance(); virtual uc32 Advance();
virtual void SeekForward(int pos); virtual void SeekForward(int pos);
private: private:
const uint16_t* raw_data_; const CharType* raw_data_; // Pointer to the actual array of characters.
}; };
...@@ -263,11 +269,15 @@ class Scanner { ...@@ -263,11 +269,15 @@ class Scanner {
// Construction // Construction
explicit Scanner(ParserMode parse_mode); explicit Scanner(ParserMode parse_mode);
// Initialize the Scanner to scan source: // Initialize the Scanner to scan source.
void Init(Handle<String> source, void Initialize(Handle<String> source,
unibrow::CharacterStream* stream, ParserLanguage language);
int position, void Initialize(Handle<String> source,
ParserLanguage language); unibrow::CharacterStream* stream,
ParserLanguage language);
void Initialize(Handle<String> source,
int start_position, int end_position,
ParserLanguage language);
// Returns the next token. // Returns the next token.
Token::Value Next(); Token::Value Next();
...@@ -335,7 +345,6 @@ class Scanner { ...@@ -335,7 +345,6 @@ class Scanner {
// tokens, which is what it is used for. // tokens, which is what it is used for.
void SeekForward(int pos); void SeekForward(int pos);
Handle<String> SubString(int start_pos, int end_pos);
bool stack_overflow() { return stack_overflow_; } bool stack_overflow() { return stack_overflow_; }
static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }
...@@ -350,14 +359,28 @@ class Scanner { ...@@ -350,14 +359,28 @@ class Scanner {
static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
static const int kCharacterLookaheadBufferSize = 1; static const int kCharacterLookaheadBufferSize = 1;
static const int kNoEndPosition = 1;
private: private:
void Init(Handle<String> source,
unibrow::CharacterStream* stream,
int start_position, int end_position,
ParserLanguage language);
// Different UTF16 buffers used to pull characters from. Based on input one of
// these will be initialized as the actual data source.
CharacterStreamUTF16Buffer char_stream_buffer_; CharacterStreamUTF16Buffer char_stream_buffer_;
TwoByteStringUTF16Buffer two_byte_string_buffer_; ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
two_byte_string_buffer_;
ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
// Source. // Source. Will point to one of the buffers declared above.
UTF16Buffer* source_; UTF16Buffer* source_;
int position_;
// Used to convert the source string into a character stream when a stream
// is not passed to the scanner.
SafeStringInputBuffer safe_string_input_buffer_;
// Buffer to hold literal values (identifiers, strings, numbers) // Buffer to hold literal values (identifiers, strings, numbers)
// using 0-terminated UTF-8 encoding. // using 0-terminated UTF-8 encoding.
...@@ -460,7 +483,7 @@ class Scanner { ...@@ -460,7 +483,7 @@ class Scanner {
// Return the current source position. // Return the current source position.
int source_pos() { int source_pos() {
return source_->pos() - kCharacterLookaheadBufferSize + position_; return source_->pos() - kCharacterLookaheadBufferSize;
} }
// Decodes a unicode escape-sequence which is part of an identifier. // Decodes a unicode escape-sequence which is part of an identifier.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment