Commit 1c70072f authored by feng@chromium.org's avatar feng@chromium.org

Streamline the scanner for external two byte string input.

Review URL: http://codereview.chromium.org/165403

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2703 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 33fb11c1
...@@ -1046,7 +1046,7 @@ void ObjectTemplate::SetInternalFieldCount(int value) { ...@@ -1046,7 +1046,7 @@ void ObjectTemplate::SetInternalFieldCount(int value) {
ScriptData* ScriptData::PreCompile(const char* input, int length) { ScriptData* ScriptData::PreCompile(const char* input, int length) {
unibrow::Utf8InputBuffer<> buf(input, length); unibrow::Utf8InputBuffer<> buf(input, length);
return i::PreParse(&buf, NULL); return i::PreParse(i::Handle<i::String>(), &buf, NULL);
} }
......
...@@ -266,7 +266,7 @@ Handle<JSFunction> Compiler::Compile(Handle<String> source, ...@@ -266,7 +266,7 @@ Handle<JSFunction> Compiler::Compile(Handle<String> source,
if (pre_data == NULL && source_length >= FLAG_min_preparse_length) { if (pre_data == NULL && source_length >= FLAG_min_preparse_length) {
Access<SafeStringInputBuffer> buf(&safe_string_input_buffer); Access<SafeStringInputBuffer> buf(&safe_string_input_buffer);
buf->Reset(source.location()); buf->Reset(source.location());
pre_data = PreParse(buf.value(), extension); pre_data = PreParse(source, buf.value(), extension);
} }
// Create a script object describing the script to be compiled. // Create a script object describing the script to be compiled.
......
...@@ -87,8 +87,10 @@ Handle<String> Factory::NewStringFromUtf8(Vector<const char> string, ...@@ -87,8 +87,10 @@ Handle<String> Factory::NewStringFromUtf8(Vector<const char> string,
} }
Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string) { Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string,
CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string), String); PretenureFlag pretenure) {
CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string, pretenure),
String);
} }
......
...@@ -92,7 +92,8 @@ class Factory : public AllStatic { ...@@ -92,7 +92,8 @@ class Factory : public AllStatic {
Vector<const char> str, Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED); PretenureFlag pretenure = NOT_TENURED);
static Handle<String> NewStringFromTwoByte(Vector<const uc16> str); static Handle<String> NewStringFromTwoByte(Vector<const uc16> str,
PretenureFlag pretenure = NOT_TENURED);
// Allocates and partially initializes a TwoByte String. The characters of // Allocates and partially initializes a TwoByte String. The characters of
// the string are uninitialized. Currently used in regexp code only, where // the string are uninitialized. Currently used in regexp code only, where
......
...@@ -97,7 +97,7 @@ class Parser { ...@@ -97,7 +97,7 @@ class Parser {
// Pre-parse the program from the character stream; returns true on // Pre-parse the program from the character stream; returns true on
// success, false if a stack-overflow happened during parsing. // success, false if a stack-overflow happened during parsing.
bool PreParseProgram(unibrow::CharacterStream* stream); bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream);
void ReportMessage(const char* message, Vector<const char*> args); void ReportMessage(const char* message, Vector<const char*> args);
virtual void ReportMessageAt(Scanner::Location loc, virtual void ReportMessageAt(Scanner::Location loc,
...@@ -1167,13 +1167,14 @@ Parser::Parser(Handle<Script> script, ...@@ -1167,13 +1167,14 @@ Parser::Parser(Handle<Script> script,
} }
bool Parser::PreParseProgram(unibrow::CharacterStream* stream) { bool Parser::PreParseProgram(Handle<String> source,
unibrow::CharacterStream* stream) {
HistogramTimerScope timer(&Counters::pre_parse); HistogramTimerScope timer(&Counters::pre_parse);
StackGuard guard; StackGuard guard;
AssertNoZoneAllocation assert_no_zone_allocation; AssertNoZoneAllocation assert_no_zone_allocation;
AssertNoAllocation assert_no_allocation; AssertNoAllocation assert_no_allocation;
NoHandleAllocation no_handle_allocation; NoHandleAllocation no_handle_allocation;
scanner_.Init(Handle<String>(), stream, 0); scanner_.Init(source, stream, 0);
ASSERT(target_stack_ == NULL); ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY; mode_ = PARSE_EAGERLY;
DummyScope top_scope; DummyScope top_scope;
...@@ -4593,7 +4594,8 @@ unsigned* ScriptDataImpl::Data() { ...@@ -4593,7 +4594,8 @@ unsigned* ScriptDataImpl::Data() {
} }
ScriptDataImpl* PreParse(unibrow::CharacterStream* stream, ScriptDataImpl* PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) { v8::Extension* extension) {
Handle<Script> no_script; Handle<Script> no_script;
bool allow_natives_syntax = bool allow_natives_syntax =
...@@ -4601,7 +4603,7 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream, ...@@ -4601,7 +4603,7 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
FLAG_allow_natives_syntax || FLAG_allow_natives_syntax ||
Bootstrapper::IsActive(); Bootstrapper::IsActive();
PreParser parser(no_script, allow_natives_syntax, extension); PreParser parser(no_script, allow_natives_syntax, extension);
if (!parser.PreParseProgram(stream)) return NULL; if (!parser.PreParseProgram(source, stream)) return NULL;
// The list owns the backing store so we need to clone the vector. // The list owns the backing store so we need to clone the vector.
// That way, the result will be exactly the right size rather than // That way, the result will be exactly the right size rather than
// the expected 50% too large. // the expected 50% too large.
......
...@@ -143,7 +143,8 @@ FunctionLiteral* MakeAST(bool compile_in_global_context, ...@@ -143,7 +143,8 @@ FunctionLiteral* MakeAST(bool compile_in_global_context,
ScriptDataImpl* pre_data); ScriptDataImpl* pre_data);
ScriptDataImpl* PreParse(unibrow::CharacterStream* stream, ScriptDataImpl* PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension); v8::Extension* extension);
......
...@@ -92,33 +92,35 @@ void UTF8Buffer::AddCharSlow(uc32 c) { ...@@ -92,33 +92,35 @@ void UTF8Buffer::AddCharSlow(uc32 c) {
UTF16Buffer::UTF16Buffer() UTF16Buffer::UTF16Buffer()
: pos_(0), : pos_(0), size_(0) { }
pushback_buffer_(0),
last_(0),
stream_(NULL) { }
void UTF16Buffer::Initialize(Handle<String> data, Handle<String> UTF16Buffer::SubString(int start, int end) {
unibrow::CharacterStream* input) { return internal::SubString(data_, start, end);
data_ = data;
pos_ = 0;
stream_ = input;
} }
Handle<String> UTF16Buffer::SubString(int start, int end) { // CharacterStreamUTF16Buffer
return internal::SubString(data_, start, end); CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
: pushback_buffer_(0), last_(0), stream_(NULL) { }
void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
unibrow::CharacterStream* input) {
data_ = data;
pos_ = 0;
stream_ = input;
} }
void UTF16Buffer::PushBack(uc32 ch) { void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
pushback_buffer()->Add(last_); pushback_buffer()->Add(last_);
last_ = ch; last_ = ch;
pos_--; pos_--;
} }
uc32 UTF16Buffer::Advance() { uc32 CharacterStreamUTF16Buffer::Advance() {
// NOTE: It is of importance to Persian / Farsi resources that we do // NOTE: It is of importance to Persian / Farsi resources that we do
// *not* strip format control characters in the scanner; see // *not* strip format control characters in the scanner; see
// //
...@@ -135,7 +137,7 @@ uc32 UTF16Buffer::Advance() { ...@@ -135,7 +137,7 @@ uc32 UTF16Buffer::Advance() {
uc32 next = stream_->GetNext(); uc32 next = stream_->GetNext();
return last_ = next; return last_ = next;
} else { } else {
// note: currently the following increment is necessary to avoid a // Note: currently the following increment is necessary to avoid a
// test-parser problem! // test-parser problem!
pos_++; pos_++;
return last_ = static_cast<uc32>(-1); return last_ = static_cast<uc32>(-1);
...@@ -143,13 +145,53 @@ uc32 UTF16Buffer::Advance() { ...@@ -143,13 +145,53 @@ uc32 UTF16Buffer::Advance() {
} }
void UTF16Buffer::SeekForward(int pos) { void CharacterStreamUTF16Buffer::SeekForward(int pos) {
pos_ = pos; pos_ = pos;
ASSERT(pushback_buffer()->is_empty()); ASSERT(pushback_buffer()->is_empty());
stream_->Seek(pos); stream_->Seek(pos);
} }
// TwoByteStringUTF16Buffer
TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()
: raw_data_(NULL) { }
void TwoByteStringUTF16Buffer::Initialize(
Handle<ExternalTwoByteString> data) {
ASSERT(!data.is_null());
data_ = data;
pos_ = 0;
raw_data_ = data->resource()->data();
size_ = data->length();
}
uc32 TwoByteStringUTF16Buffer::Advance() {
if (pos_ < size_) {
return raw_data_[pos_++];
} else {
// note: currently the following increment is necessary to avoid a
// test-parser problem!
pos_++;
return static_cast<uc32>(-1);
}
}
void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {
pos_--;
ASSERT(pos_ >= 0 && raw_data_[pos_] == ch);
}
void TwoByteStringUTF16Buffer::SeekForward(int pos) {
pos_ = pos;
}
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Scanner // Scanner
...@@ -161,7 +203,15 @@ Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { ...@@ -161,7 +203,15 @@ Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {
void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
int position) { int position) {
// Initialize the source buffer. // Initialize the source buffer.
source_.Initialize(source, stream); if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
two_byte_string_buffer_.Initialize(
Handle<ExternalTwoByteString>::cast(source));
source_ = &two_byte_string_buffer_;
} else {
char_stream_buffer_.Initialize(source, stream);
source_ = &char_stream_buffer_;
}
position_ = position; position_ = position;
// Reset literals buffer // Reset literals buffer
...@@ -180,7 +230,7 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, ...@@ -180,7 +230,7 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
Handle<String> Scanner::SubString(int start, int end) { Handle<String> Scanner::SubString(int start, int end) {
return source_.SubString(start - position_, end - position_); return source_->SubString(start - position_, end - position_);
} }
...@@ -223,17 +273,6 @@ void Scanner::AddCharAdvance() { ...@@ -223,17 +273,6 @@ void Scanner::AddCharAdvance() {
} }
void Scanner::Advance() {
c0_ = source_.Advance();
}
void Scanner::PushBack(uc32 ch) {
source_.PushBack(ch);
c0_ = ch;
}
static inline bool IsByteOrderMark(uc32 c) { static inline bool IsByteOrderMark(uc32 c) {
// The Unicode value U+FFFE is guaranteed never to be assigned as a // The Unicode value U+FFFE is guaranteed never to be assigned as a
// Unicode character; this implies that in a Unicode context the // Unicode character; this implies that in a Unicode context the
...@@ -583,7 +622,7 @@ void Scanner::Scan() { ...@@ -583,7 +622,7 @@ void Scanner::Scan() {
void Scanner::SeekForward(int pos) { void Scanner::SeekForward(int pos) {
source_.SeekForward(pos - 1); source_->SeekForward(pos - 1);
Advance(); Advance();
Scan(); Scan();
} }
......
...@@ -73,24 +73,53 @@ class UTF8Buffer { ...@@ -73,24 +73,53 @@ class UTF8Buffer {
class UTF16Buffer { class UTF16Buffer {
public: public:
UTF16Buffer(); UTF16Buffer();
virtual ~UTF16Buffer() {}
virtual void PushBack(uc32 ch) = 0;
// returns a value < 0 when the buffer end is reached
virtual uc32 Advance() = 0;
virtual void SeekForward(int pos) = 0;
void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
void PushBack(uc32 ch);
uc32 Advance(); // returns a value < 0 when the buffer end is reached
uint16_t CharAt(int index);
int pos() const { return pos_; } int pos() const { return pos_; }
int size() const { return size_; } int size() const { return size_; }
Handle<String> SubString(int start, int end); Handle<String> SubString(int start, int end);
List<uc32>* pushback_buffer() { return &pushback_buffer_; }
void SeekForward(int pos);
private: protected:
Handle<String> data_; Handle<String> data_;
int pos_; int pos_;
int size_; int size_;
};
class CharacterStreamUTF16Buffer: public UTF16Buffer {
public:
CharacterStreamUTF16Buffer();
virtual ~CharacterStreamUTF16Buffer() {}
void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
virtual void PushBack(uc32 ch);
virtual uc32 Advance();
virtual void SeekForward(int pos);
private:
List<uc32> pushback_buffer_; List<uc32> pushback_buffer_;
uc32 last_; uc32 last_;
unibrow::CharacterStream* stream_; unibrow::CharacterStream* stream_;
List<uc32>* pushback_buffer() { return &pushback_buffer_; }
};
class TwoByteStringUTF16Buffer: public UTF16Buffer {
public:
TwoByteStringUTF16Buffer();
virtual ~TwoByteStringUTF16Buffer() {}
void Initialize(Handle<ExternalTwoByteString> data);
virtual void PushBack(uc32 ch);
virtual uc32 Advance();
virtual void SeekForward(int pos);
private:
const uint16_t* raw_data_;
}; };
...@@ -184,8 +213,11 @@ class Scanner { ...@@ -184,8 +213,11 @@ class Scanner {
static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
private: private:
CharacterStreamUTF16Buffer char_stream_buffer_;
TwoByteStringUTF16Buffer two_byte_string_buffer_;
// Source. // Source.
UTF16Buffer source_; UTF16Buffer* source_;
int position_; int position_;
// Buffer to hold literal values (identifiers, strings, numbers) // Buffer to hold literal values (identifiers, strings, numbers)
...@@ -219,8 +251,11 @@ class Scanner { ...@@ -219,8 +251,11 @@ class Scanner {
void TerminateLiteral(); void TerminateLiteral();
// Low-level scanning support. // Low-level scanning support.
void Advance(); void Advance() { c0_ = source_->Advance(); }
void PushBack(uc32 ch); void PushBack(uc32 ch) {
source_->PushBack(ch);
c0_ = ch;
}
bool SkipWhiteSpace(); bool SkipWhiteSpace();
Token::Value SkipSingleLineComment(); Token::Value SkipSingleLineComment();
...@@ -243,7 +278,7 @@ class Scanner { ...@@ -243,7 +278,7 @@ class Scanner {
// Return the current source position. // Return the current source position.
int source_pos() { int source_pos() {
return source_.pos() - kCharacterLookaheadBufferSize + position_; return source_->pos() - kCharacterLookaheadBufferSize + position_;
} }
// Decodes a unicode escape-sequence which is part of an identifier. // Decodes a unicode escape-sequence which is part of an identifier.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment