Commit 1c70072f authored by feng@chromium.org's avatar feng@chromium.org

Streamline the scanner for external two byte string input.

Review URL: http://codereview.chromium.org/165403

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2703 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 33fb11c1
......@@ -1046,7 +1046,7 @@ void ObjectTemplate::SetInternalFieldCount(int value) {
ScriptData* ScriptData::PreCompile(const char* input, int length) {
unibrow::Utf8InputBuffer<> buf(input, length);
return i::PreParse(&buf, NULL);
return i::PreParse(i::Handle<i::String>(), &buf, NULL);
}
......
......@@ -266,7 +266,7 @@ Handle<JSFunction> Compiler::Compile(Handle<String> source,
if (pre_data == NULL && source_length >= FLAG_min_preparse_length) {
Access<SafeStringInputBuffer> buf(&safe_string_input_buffer);
buf->Reset(source.location());
pre_data = PreParse(buf.value(), extension);
pre_data = PreParse(source, buf.value(), extension);
}
// Create a script object describing the script to be compiled.
......
......@@ -87,8 +87,10 @@ Handle<String> Factory::NewStringFromUtf8(Vector<const char> string,
}
Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string) {
CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string), String);
Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string,
PretenureFlag pretenure) {
CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string, pretenure),
String);
}
......
......@@ -92,7 +92,8 @@ class Factory : public AllStatic {
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
static Handle<String> NewStringFromTwoByte(Vector<const uc16> str);
static Handle<String> NewStringFromTwoByte(Vector<const uc16> str,
PretenureFlag pretenure = NOT_TENURED);
// Allocates and partially initializes a TwoByte String. The characters of
// the string are uninitialized. Currently used in regexp code only, where
......
......@@ -97,7 +97,7 @@ class Parser {
// Pre-parse the program from the character stream; returns true on
// success, false if a stack-overflow happened during parsing.
bool PreParseProgram(unibrow::CharacterStream* stream);
bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream);
void ReportMessage(const char* message, Vector<const char*> args);
virtual void ReportMessageAt(Scanner::Location loc,
......@@ -1167,13 +1167,14 @@ Parser::Parser(Handle<Script> script,
}
bool Parser::PreParseProgram(unibrow::CharacterStream* stream) {
bool Parser::PreParseProgram(Handle<String> source,
unibrow::CharacterStream* stream) {
HistogramTimerScope timer(&Counters::pre_parse);
StackGuard guard;
AssertNoZoneAllocation assert_no_zone_allocation;
AssertNoAllocation assert_no_allocation;
NoHandleAllocation no_handle_allocation;
scanner_.Init(Handle<String>(), stream, 0);
scanner_.Init(source, stream, 0);
ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY;
DummyScope top_scope;
......@@ -4593,7 +4594,8 @@ unsigned* ScriptDataImpl::Data() {
}
ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
ScriptDataImpl* PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
bool allow_natives_syntax =
......@@ -4601,7 +4603,7 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
FLAG_allow_natives_syntax ||
Bootstrapper::IsActive();
PreParser parser(no_script, allow_natives_syntax, extension);
if (!parser.PreParseProgram(stream)) return NULL;
if (!parser.PreParseProgram(source, stream)) return NULL;
// The list owns the backing store so we need to clone the vector.
// That way, the result will be exactly the right size rather than
// the expected 50% too large.
......
......@@ -143,7 +143,8 @@ FunctionLiteral* MakeAST(bool compile_in_global_context,
ScriptDataImpl* pre_data);
ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
ScriptDataImpl* PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension);
......
......@@ -92,33 +92,35 @@ void UTF8Buffer::AddCharSlow(uc32 c) {
UTF16Buffer::UTF16Buffer()
: pos_(0),
pushback_buffer_(0),
last_(0),
stream_(NULL) { }
: pos_(0), size_(0) { }
void UTF16Buffer::Initialize(Handle<String> data,
unibrow::CharacterStream* input) {
data_ = data;
pos_ = 0;
stream_ = input;
Handle<String> UTF16Buffer::SubString(int start, int end) {
return internal::SubString(data_, start, end);
}
Handle<String> UTF16Buffer::SubString(int start, int end) {
return internal::SubString(data_, start, end);
// CharacterStreamUTF16Buffer
CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
: pushback_buffer_(0), last_(0), stream_(NULL) { }
void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
unibrow::CharacterStream* input) {
data_ = data;
pos_ = 0;
stream_ = input;
}
void UTF16Buffer::PushBack(uc32 ch) {
void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
pushback_buffer()->Add(last_);
last_ = ch;
pos_--;
}
uc32 UTF16Buffer::Advance() {
uc32 CharacterStreamUTF16Buffer::Advance() {
// NOTE: It is of importance to Persian / Farsi resources that we do
// *not* strip format control characters in the scanner; see
//
......@@ -135,7 +137,7 @@ uc32 UTF16Buffer::Advance() {
uc32 next = stream_->GetNext();
return last_ = next;
} else {
// note: currently the following increment is necessary to avoid a
// Note: currently the following increment is necessary to avoid a
// test-parser problem!
pos_++;
return last_ = static_cast<uc32>(-1);
......@@ -143,13 +145,53 @@ uc32 UTF16Buffer::Advance() {
}
void UTF16Buffer::SeekForward(int pos) {
void CharacterStreamUTF16Buffer::SeekForward(int pos) {
pos_ = pos;
ASSERT(pushback_buffer()->is_empty());
stream_->Seek(pos);
}
// TwoByteStringUTF16Buffer
TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()
: raw_data_(NULL) { }
void TwoByteStringUTF16Buffer::Initialize(
Handle<ExternalTwoByteString> data) {
ASSERT(!data.is_null());
data_ = data;
pos_ = 0;
raw_data_ = data->resource()->data();
size_ = data->length();
}
uc32 TwoByteStringUTF16Buffer::Advance() {
if (pos_ < size_) {
return raw_data_[pos_++];
} else {
// note: currently the following increment is necessary to avoid a
// test-parser problem!
pos_++;
return static_cast<uc32>(-1);
}
}
void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {
pos_--;
ASSERT(pos_ >= 0 && raw_data_[pos_] == ch);
}
void TwoByteStringUTF16Buffer::SeekForward(int pos) {
pos_ = pos;
}
// ----------------------------------------------------------------------------
// Scanner
......@@ -161,7 +203,15 @@ Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {
void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
int position) {
// Initialize the source buffer.
source_.Initialize(source, stream);
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
two_byte_string_buffer_.Initialize(
Handle<ExternalTwoByteString>::cast(source));
source_ = &two_byte_string_buffer_;
} else {
char_stream_buffer_.Initialize(source, stream);
source_ = &char_stream_buffer_;
}
position_ = position;
// Reset literals buffer
......@@ -180,7 +230,7 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
Handle<String> Scanner::SubString(int start, int end) {
return source_.SubString(start - position_, end - position_);
return source_->SubString(start - position_, end - position_);
}
......@@ -223,17 +273,6 @@ void Scanner::AddCharAdvance() {
}
void Scanner::Advance() {
c0_ = source_.Advance();
}
void Scanner::PushBack(uc32 ch) {
source_.PushBack(ch);
c0_ = ch;
}
static inline bool IsByteOrderMark(uc32 c) {
// The Unicode value U+FFFE is guaranteed never to be assigned as a
// Unicode character; this implies that in a Unicode context the
......@@ -583,7 +622,7 @@ void Scanner::Scan() {
void Scanner::SeekForward(int pos) {
source_.SeekForward(pos - 1);
source_->SeekForward(pos - 1);
Advance();
Scan();
}
......
......@@ -73,24 +73,53 @@ class UTF8Buffer {
class UTF16Buffer {
public:
UTF16Buffer();
virtual ~UTF16Buffer() {}
virtual void PushBack(uc32 ch) = 0;
// returns a value < 0 when the buffer end is reached
virtual uc32 Advance() = 0;
virtual void SeekForward(int pos) = 0;
void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
void PushBack(uc32 ch);
uc32 Advance(); // returns a value < 0 when the buffer end is reached
uint16_t CharAt(int index);
int pos() const { return pos_; }
int size() const { return size_; }
Handle<String> SubString(int start, int end);
List<uc32>* pushback_buffer() { return &pushback_buffer_; }
void SeekForward(int pos);
private:
protected:
Handle<String> data_;
int pos_;
int size_;
};
class CharacterStreamUTF16Buffer: public UTF16Buffer {
public:
CharacterStreamUTF16Buffer();
virtual ~CharacterStreamUTF16Buffer() {}
void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
virtual void PushBack(uc32 ch);
virtual uc32 Advance();
virtual void SeekForward(int pos);
private:
List<uc32> pushback_buffer_;
uc32 last_;
unibrow::CharacterStream* stream_;
List<uc32>* pushback_buffer() { return &pushback_buffer_; }
};
class TwoByteStringUTF16Buffer: public UTF16Buffer {
public:
TwoByteStringUTF16Buffer();
virtual ~TwoByteStringUTF16Buffer() {}
void Initialize(Handle<ExternalTwoByteString> data);
virtual void PushBack(uc32 ch);
virtual uc32 Advance();
virtual void SeekForward(int pos);
private:
const uint16_t* raw_data_;
};
......@@ -184,8 +213,11 @@ class Scanner {
static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
private:
CharacterStreamUTF16Buffer char_stream_buffer_;
TwoByteStringUTF16Buffer two_byte_string_buffer_;
// Source.
UTF16Buffer source_;
UTF16Buffer* source_;
int position_;
// Buffer to hold literal values (identifiers, strings, numbers)
......@@ -219,8 +251,11 @@ class Scanner {
void TerminateLiteral();
// Low-level scanning support.
void Advance();
void PushBack(uc32 ch);
void Advance() { c0_ = source_->Advance(); }
void PushBack(uc32 ch) {
source_->PushBack(ch);
c0_ = ch;
}
bool SkipWhiteSpace();
Token::Value SkipSingleLineComment();
......@@ -243,7 +278,7 @@ class Scanner {
// Return the current source position.
int source_pos() {
return source_.pos() - kCharacterLookaheadBufferSize + position_;
return source_->pos() - kCharacterLookaheadBufferSize + position_;
}
// Decodes a unicode escape-sequence which is part of an identifier.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment