Commit cf0d7455 authored by lrn@chromium.org's avatar lrn@chromium.org

Remove unnecessary buffer doubling and content copying.

Review URL: http://codereview.chromium.org/377006


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3246 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent be8e9c0b
...@@ -42,35 +42,27 @@ unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; ...@@ -42,35 +42,27 @@ unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// UTF8Buffer // UTF8Buffer
UTF8Buffer::UTF8Buffer() { UTF8Buffer::UTF8Buffer() :
static const int kInitialCapacity = 1 * KB; data_(NULL), limit_(NULL) {
data_ = NewArray<char>(kInitialCapacity);
limit_ = ComputeLimit(data_, kInitialCapacity);
Reset();
ASSERT(Capacity() == kInitialCapacity && pos() == 0);
} }
UTF8Buffer::~UTF8Buffer() { UTF8Buffer::~UTF8Buffer() {
DeleteArray(data_); DeleteArray(data_);
} }
void UTF8Buffer::AddCharSlow(uc32 c) { void UTF8Buffer::AddCharSlow(uc32 c) {
static const int kCapacityGrowthLimit = 1 * MB; static const int kCapacityGrowthLimit = 1 * MB;
if (cursor_ > limit_) { if (cursor_ > limit_) {
int old_capacity = Capacity(); int old_capacity = Capacity();
int old_position = pos(); int old_position = pos();
int new_capacity = int new_capacity = Min(old_capacity * 3, old_capacity
Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit); + kCapacityGrowthLimit);
char* new_data = NewArray<char>(new_capacity); char* new_data = NewArray<char> (new_capacity);
memcpy(new_data, data_, old_position); memcpy(new_data, data_, old_position);
DeleteArray(data_); DeleteArray(data_);
data_ = new_data; data_ = new_data;
...@@ -78,32 +70,30 @@ void UTF8Buffer::AddCharSlow(uc32 c) { ...@@ -78,32 +70,30 @@ void UTF8Buffer::AddCharSlow(uc32 c) {
limit_ = ComputeLimit(new_data, new_capacity); limit_ = ComputeLimit(new_data, new_capacity);
ASSERT(Capacity() == new_capacity && pos() == old_position); ASSERT(Capacity() == new_capacity && pos() == old_position);
} }
if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { if (static_cast<unsigned> (c) <= unibrow::Utf8::kMaxOneByteChar) {
*cursor_++ = c; // Common case: 7-bit ASCII. *cursor_++ = c; // Common case: 7-bit ASCII.
} else { } else {
cursor_ += unibrow::Utf8::Encode(cursor_, c); cursor_ += unibrow::Utf8::Encode(cursor_, c);
} }
ASSERT(pos() <= Capacity()); ASSERT(pos() <= Capacity());
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// UTF16Buffer // UTF16Buffer
UTF16Buffer::UTF16Buffer() UTF16Buffer::UTF16Buffer() :
: pos_(0), size_(0) { } pos_(0), size_(0) {
}
Handle<String> UTF16Buffer::SubString(int start, int end) { Handle<String> UTF16Buffer::SubString(int start, int end) {
return internal::SubString(data_, start, end); return internal::SubString(data_, start, end);
} }
// CharacterStreamUTF16Buffer // CharacterStreamUTF16Buffer
CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() :
: pushback_buffer_(0), last_(0), stream_(NULL) { } pushback_buffer_(0), last_(0), stream_(NULL) {
}
void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
unibrow::CharacterStream* input) { unibrow::CharacterStream* input) {
...@@ -112,14 +102,12 @@ void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, ...@@ -112,14 +102,12 @@ void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
stream_ = input; stream_ = input;
} }
void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
pushback_buffer()->Add(last_); pushback_buffer()->Add(last_);
last_ = ch; last_ = ch;
pos_--; pos_--;
} }
uc32 CharacterStreamUTF16Buffer::Advance() { uc32 CharacterStreamUTF16Buffer::Advance() {
// NOTE: It is of importance to Persian / Farsi resources that we do // NOTE: It is of importance to Persian / Farsi resources that we do
// *not* strip format control characters in the scanner; see // *not* strip format control characters in the scanner; see
...@@ -140,25 +128,22 @@ uc32 CharacterStreamUTF16Buffer::Advance() { ...@@ -140,25 +128,22 @@ uc32 CharacterStreamUTF16Buffer::Advance() {
// Note: currently the following increment is necessary to avoid a // Note: currently the following increment is necessary to avoid a
// test-parser problem! // test-parser problem!
pos_++; pos_++;
return last_ = static_cast<uc32>(-1); return last_ = static_cast<uc32> (-1);
} }
} }
void CharacterStreamUTF16Buffer::SeekForward(int pos) { void CharacterStreamUTF16Buffer::SeekForward(int pos) {
pos_ = pos; pos_ = pos;
ASSERT(pushback_buffer()->is_empty()); ASSERT(pushback_buffer()->is_empty());
stream_->Seek(pos); stream_->Seek(pos);
} }
// TwoByteStringUTF16Buffer // TwoByteStringUTF16Buffer
TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() :
: raw_data_(NULL) { } raw_data_(NULL) {
}
void TwoByteStringUTF16Buffer::Initialize( void TwoByteStringUTF16Buffer::Initialize(Handle<ExternalTwoByteString> data) {
Handle<ExternalTwoByteString> data) {
ASSERT(!data.is_null()); ASSERT(!data.is_null());
data_ = data; data_ = data;
...@@ -168,7 +153,6 @@ void TwoByteStringUTF16Buffer::Initialize( ...@@ -168,7 +153,6 @@ void TwoByteStringUTF16Buffer::Initialize(
size_ = data->length(); size_ = data->length();
} }
uc32 TwoByteStringUTF16Buffer::Advance() { uc32 TwoByteStringUTF16Buffer::Advance() {
if (pos_ < size_) { if (pos_ < size_) {
return raw_data_[pos_++]; return raw_data_[pos_++];
...@@ -176,50 +160,35 @@ uc32 TwoByteStringUTF16Buffer::Advance() { ...@@ -176,50 +160,35 @@ uc32 TwoByteStringUTF16Buffer::Advance() {
// note: currently the following increment is necessary to avoid a // note: currently the following increment is necessary to avoid a
// test-parser problem! // test-parser problem!
pos_++; pos_++;
return static_cast<uc32>(-1); return static_cast<uc32> (-1);
} }
} }
void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {
pos_--; pos_--;
ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
} }
void TwoByteStringUTF16Buffer::SeekForward(int pos) { void TwoByteStringUTF16Buffer::SeekForward(int pos) {
pos_ = pos; pos_ = pos;
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Keyword Matcher // Keyword Matcher
KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { { "break",
{ "break", KEYWORD_PREFIX, Token::BREAK }, KEYWORD_PREFIX, Token::BREAK }, { NULL, C, Token::ILLEGAL }, { NULL, D,
{ NULL, C, Token::ILLEGAL }, Token::ILLEGAL }, { "else", KEYWORD_PREFIX, Token::ELSE }, { NULL, F,
{ NULL, D, Token::ILLEGAL }, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL,
{ "else", KEYWORD_PREFIX, Token::ELSE }, UNMATCHABLE, Token::ILLEGAL }, { NULL, I, Token::ILLEGAL }, { NULL,
{ NULL, F, Token::ILLEGAL }, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, {
{ NULL, UNMATCHABLE, Token::ILLEGAL }, NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL },
{ NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, N, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL,
{ NULL, I, Token::ILLEGAL }, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, {
{ NULL, UNMATCHABLE, Token::ILLEGAL }, "return", KEYWORD_PREFIX, Token::RETURN }, { "switch", KEYWORD_PREFIX,
{ NULL, UNMATCHABLE, Token::ILLEGAL }, Token::SWITCH }, { NULL, T, Token::ILLEGAL }, { NULL, UNMATCHABLE,
{ NULL, UNMATCHABLE, Token::ILLEGAL }, Token::ILLEGAL }, { NULL, V, Token::ILLEGAL }, { NULL, W,
{ NULL, UNMATCHABLE, Token::ILLEGAL }, Token::ILLEGAL } };
{ NULL, N, Token::ILLEGAL },
{ NULL, UNMATCHABLE, Token::ILLEGAL },
{ NULL, UNMATCHABLE, Token::ILLEGAL },
{ NULL, UNMATCHABLE, Token::ILLEGAL },
{ "return", KEYWORD_PREFIX, Token::RETURN },
{ "switch", KEYWORD_PREFIX, Token::SWITCH },
{ NULL, T, Token::ILLEGAL },
{ NULL, UNMATCHABLE, Token::ILLEGAL },
{ NULL, V, Token::ILLEGAL },
{ NULL, W, Token::ILLEGAL }
};
void KeywordMatcher::Step(uc32 input) { void KeywordMatcher::Step(uc32 input) {
switch (state_) { switch (state_) {
...@@ -253,38 +222,56 @@ void KeywordMatcher::Step(uc32 input) { ...@@ -253,38 +222,56 @@ void KeywordMatcher::Step(uc32 input) {
token_ = Token::IDENTIFIER; token_ = Token::IDENTIFIER;
break; break;
case C: case C:
if (MatchState(input, 'a', CA)) return; if (MatchState(input, 'a', CA))
if (MatchState(input, 'o', CO)) return; return;
if (MatchState(input, 'o', CO))
return;
break; break;
case CA: case CA:
if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; if (MatchKeywordStart(input, "case", 2, Token::CASE))
if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; return;
if (MatchKeywordStart(input, "catch", 2, Token::CATCH))
return;
break; break;
case CO: case CO:
if (MatchState(input, 'n', CON)) return; if (MatchState(input, 'n', CON))
return;
break; break;
case CON: case CON:
if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; if (MatchKeywordStart(input, "const", 3, Token::CONST))
if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; return;
if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE))
return;
break; break;
case D: case D:
if (MatchState(input, 'e', DE)) return; if (MatchState(input, 'e', DE))
if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; return;
if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO))
return;
break; break;
case DE: case DE:
if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER))
if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; return;
if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; if (MatchKeywordStart(input, "default", 2, Token::DEFAULT))
return;
if (MatchKeywordStart(input, "delete", 2, Token::DELETE))
return;
break; break;
case F: case F:
if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL))
if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; return;
if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; if (MatchKeywordStart(input, "finally", 1, Token::FINALLY))
if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; return;
if (MatchKeywordStart(input, "for", 1, Token::FOR))
return;
if (MatchKeywordStart(input, "function", 1, Token::FUNCTION))
return;
break; break;
case I: case I:
if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF))
if (MatchKeyword(input, 'n', IN, Token::IN)) return; return;
if (MatchKeyword(input, 'n', IN, Token::IN))
return;
break; break;
case IN: case IN:
token_ = Token::IDENTIFIER; token_ = Token::IDENTIFIER;
...@@ -293,30 +280,44 @@ void KeywordMatcher::Step(uc32 input) { ...@@ -293,30 +280,44 @@ void KeywordMatcher::Step(uc32 input) {
} }
break; break;
case N: case N:
if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; if (MatchKeywordStart(input, "native", 1, Token::NATIVE))
if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; return;
if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; if (MatchKeywordStart(input, "new", 1, Token::NEW))
return;
if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL))
return;
break; break;
case T: case T:
if (MatchState(input, 'h', TH)) return; if (MatchState(input, 'h', TH))
if (MatchState(input, 'r', TR)) return; return;
if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; if (MatchState(input, 'r', TR))
return;
if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF))
return;
break; break;
case TH: case TH:
if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; if (MatchKeywordStart(input, "this", 2, Token::THIS))
if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; return;
if (MatchKeywordStart(input, "throw", 2, Token::THROW))
return;
break; break;
case TR: case TR:
if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL))
if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; return;
if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY))
return;
break; break;
case V: case V:
if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; if (MatchKeywordStart(input, "var", 1, Token::VAR))
if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; return;
if (MatchKeywordStart(input, "void", 1, Token::VOID))
return;
break; break;
case W: case W:
if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; if (MatchKeywordStart(input, "while", 1, Token::WHILE))
if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; return;
if (MatchKeywordStart(input, "with", 1, Token::WITH))
return;
break; break;
default: default:
UNREACHABLE(); UNREACHABLE();
...@@ -325,19 +326,20 @@ void KeywordMatcher::Step(uc32 input) { ...@@ -325,19 +326,20 @@ void KeywordMatcher::Step(uc32 input) {
state_ = UNMATCHABLE; state_ = UNMATCHABLE;
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Scanner // Scanner
Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } Scanner::Scanner(bool pre) :
stack_overflow_(false), is_pre_parsing_(pre) {
}
void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
int position) { int position) {
// Initialize the source buffer. // Initialize the source buffer.
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
two_byte_string_buffer_.Initialize( two_byte_string_buffer_.Initialize(
Handle<ExternalTwoByteString>::cast(source)); Handle<ExternalTwoByteString>::cast(
source));
source_ = &two_byte_string_buffer_; source_ = &two_byte_string_buffer_;
} else { } else {
char_stream_buffer_.Initialize(source, stream); char_stream_buffer_.Initialize(source, stream);
...@@ -346,9 +348,6 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, ...@@ -346,9 +348,6 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
position_ = position; position_ = position;
// Reset literals buffer
literals_.Reset();
// Set c0_ (one character ahead) // Set c0_ (one character ahead)
ASSERT(kCharacterLookaheadBufferSize == 1); ASSERT(kCharacterLookaheadBufferSize == 1);
Advance(); Advance();
...@@ -360,12 +359,10 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, ...@@ -360,12 +359,10 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
Scan(); Scan();
} }
Handle<String> Scanner::SubString(int start, int end) { Handle<String> Scanner::SubString(int start, int end) {
return source_->SubString(start - position_, end - position_); return source_->SubString(start - position_, end - position_);
} }
Token::Value Scanner::Next() { Token::Value Scanner::Next() {
// BUG 1215673: Find a thread safe way to set a stack limit in // BUG 1215673: Find a thread safe way to set a stack limit in
// pre-parse mode. Otherwise, we cannot safely pre-parse from other // pre-parse mode. Otherwise, we cannot safely pre-parse from other
...@@ -376,35 +373,37 @@ Token::Value Scanner::Next() { ...@@ -376,35 +373,37 @@ Token::Value Scanner::Next() {
if (check.HasOverflowed()) { if (check.HasOverflowed()) {
stack_overflow_ = true; stack_overflow_ = true;
next_.token = Token::ILLEGAL; next_.token = Token::ILLEGAL;
next_.literal_buffer = NULL;
} else { } else {
Scan(); Scan();
} }
return current_.token; return current_.token;
} }
void Scanner::StartLiteral() { void Scanner::StartLiteral() {
next_.literal_pos = literals_.pos(); // Use the first buffer unless it's currently in use by the current_ token.
// In most cases we won't have two literals/identifiers in a row, so
// the second buffer won't be used very often and is unlikely to grow much.
UTF8Buffer* free_buffer =
(current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_
: &literal_buffer_2_;
next_.literal_buffer = free_buffer;
free_buffer->Reset();
} }
void Scanner::AddChar(uc32 c) { void Scanner::AddChar(uc32 c) {
literals_.AddChar(c); next_.literal_buffer->AddChar(c);
} }
void Scanner::TerminateLiteral() { void Scanner::TerminateLiteral() {
next_.literal_end = literals_.pos();
AddChar(0); AddChar(0);
} }
void Scanner::AddCharAdvance() { void Scanner::AddCharAdvance() {
AddChar(c0_); AddChar(c0_);
Advance(); Advance();
} }
static inline bool IsByteOrderMark(uc32 c) { static inline bool IsByteOrderMark(uc32 c) {
// The Unicode value U+FFFE is guaranteed never to be assigned as a // The Unicode value U+FFFE is guaranteed never to be assigned as a
// Unicode character; this implies that in a Unicode context the // Unicode character; this implies that in a Unicode context the
...@@ -416,7 +415,6 @@ static inline bool IsByteOrderMark(uc32 c) { ...@@ -416,7 +415,6 @@ static inline bool IsByteOrderMark(uc32 c) {
return c == 0xFEFF || c == 0xFFFE; return c == 0xFEFF || c == 0xFFFE;
} }
bool Scanner::SkipWhiteSpace() { bool Scanner::SkipWhiteSpace() {
int start_position = source_pos(); int start_position = source_pos();
...@@ -447,16 +445,15 @@ bool Scanner::SkipWhiteSpace() { ...@@ -447,16 +445,15 @@ bool Scanner::SkipWhiteSpace() {
// Continue skipping white space after the comment. // Continue skipping white space after the comment.
continue; continue;
} }
PushBack('-'); // undo Advance() PushBack('-'); // undo Advance()
} }
PushBack('-'); // undo Advance() PushBack('-'); // undo Advance()
} }
// Return whether or not we skipped any characters. // Return whether or not we skipped any characters.
return source_pos() != start_position; return source_pos() != start_position;
} }
} }
Token::Value Scanner::SkipSingleLineComment() { Token::Value Scanner::SkipSingleLineComment() {
Advance(); Advance();
...@@ -472,7 +469,6 @@ Token::Value Scanner::SkipSingleLineComment() { ...@@ -472,7 +469,6 @@ Token::Value Scanner::SkipSingleLineComment() {
return Token::WHITESPACE; return Token::WHITESPACE;
} }
Token::Value Scanner::SkipMultiLineComment() { Token::Value Scanner::SkipMultiLineComment() {
ASSERT(c0_ == '*'); ASSERT(c0_ == '*');
Advance(); Advance();
...@@ -497,22 +493,21 @@ Token::Value Scanner::SkipMultiLineComment() { ...@@ -497,22 +493,21 @@ Token::Value Scanner::SkipMultiLineComment() {
return Token::ILLEGAL; return Token::ILLEGAL;
} }
Token::Value Scanner::ScanHtmlComment() { Token::Value Scanner::ScanHtmlComment() {
// Check for <!-- comments. // Check for <!-- comments.
ASSERT(c0_ == '!'); ASSERT(c0_ == '!');
Advance(); Advance();
if (c0_ == '-') { if (c0_ == '-') {
Advance(); Advance();
if (c0_ == '-') return SkipSingleLineComment(); if (c0_ == '-')
PushBack('-'); // undo Advance() return SkipSingleLineComment();
PushBack('-'); // undo Advance()
} }
PushBack('!'); // undo Advance() PushBack('!'); // undo Advance()
ASSERT(c0_ == '!'); ASSERT(c0_ == '!');
return Token::LT; return Token::LT;
} }
void Scanner::Scan() { void Scanner::Scan() {
Token::Value token; Token::Value token;
has_line_terminator_before_next_ = false; has_line_terminator_before_next_ = false;
...@@ -533,7 +528,8 @@ void Scanner::Scan() { ...@@ -533,7 +528,8 @@ void Scanner::Scan() {
token = Token::WHITESPACE; token = Token::WHITESPACE;
break; break;
case '"': case '\'': case '"':
case '\'':
token = ScanString(); token = ScanString();
break; break;
...@@ -752,16 +748,14 @@ void Scanner::Scan() { ...@@ -752,16 +748,14 @@ void Scanner::Scan() {
next_.token = token; next_.token = token;
} }
void Scanner::SeekForward(int pos) { void Scanner::SeekForward(int pos) {
source_->SeekForward(pos - 1); source_->SeekForward(pos - 1);
Advance(); Advance();
Scan(); Scan();
} }
uc32 Scanner::ScanHexEscape(uc32 c, int length) { uc32 Scanner::ScanHexEscape(uc32 c, int length) {
ASSERT(length <= 4); // prevent overflow ASSERT(length <= 4); // prevent overflow
uc32 digits[4]; uc32 digits[4];
uc32 x = 0; uc32 x = 0;
...@@ -774,7 +768,7 @@ uc32 Scanner::ScanHexEscape(uc32 c, int length) { ...@@ -774,7 +768,7 @@ uc32 Scanner::ScanHexEscape(uc32 c, int length) {
// non-escaped version of the original character. // non-escaped version of the original character.
// Push back digits read, except the last one (in c0_). // Push back digits read, except the last one (in c0_).
for (int j = i-1; j >= 0; j--) { for (int j = i - 1; j >= 0; j--) {
PushBack(digits[j]); PushBack(digits[j]);
} }
// Notice: No handling of error - treat it as "\u"->"u". // Notice: No handling of error - treat it as "\u"->"u".
...@@ -787,23 +781,23 @@ uc32 Scanner::ScanHexEscape(uc32 c, int length) { ...@@ -787,23 +781,23 @@ uc32 Scanner::ScanHexEscape(uc32 c, int length) {
return x; return x;
} }
// Octal escapes of the forms '\0xx' and '\xxx' are not a part of // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
// ECMA-262. Other JS VMs support them. // ECMA-262. Other JS VMs support them.
uc32 Scanner::ScanOctalEscape(uc32 c, int length) { uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
uc32 x = c - '0'; uc32 x = c - '0';
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
int d = c0_ - '0'; int d = c0_ - '0';
if (d < 0 || d > 7) break; if (d < 0 || d > 7)
break;
int nx = x * 8 + d; int nx = x * 8 + d;
if (nx >= 256) break; if (nx >= 256)
break;
x = nx; x = nx;
Advance(); Advance();
} }
return x; return x;
} }
void Scanner::ScanEscape() { void Scanner::ScanEscape() {
uc32 c = c0_; uc32 c = c0_;
Advance(); Advance();
...@@ -811,32 +805,53 @@ void Scanner::ScanEscape() { ...@@ -811,32 +805,53 @@ void Scanner::ScanEscape() {
// Skip escaped newlines. // Skip escaped newlines.
if (kIsLineTerminator.get(c)) { if (kIsLineTerminator.get(c)) {
// Allow CR+LF newlines in multiline string literals. // Allow CR+LF newlines in multiline string literals.
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); if (IsCarriageReturn(c) && IsLineFeed(c0_))
Advance();
// Allow LF+CR newlines in multiline string literals. // Allow LF+CR newlines in multiline string literals.
if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); if (IsLineFeed(c) && IsCarriageReturn(c0_))
Advance();
return; return;
} }
switch (c) { switch (c) {
case '\'': // fall through case '\'': // fall through
case '"' : // fall through case '"': // fall through
case '\\': break; case '\\':
case 'b' : c = '\b'; break; break;
case 'f' : c = '\f'; break; case 'b':
case 'n' : c = '\n'; break; c = '\b';
case 'r' : c = '\r'; break; break;
case 't' : c = '\t'; break; case 'f':
case 'u' : c = ScanHexEscape(c, 4); break; c = '\f';
case 'v' : c = '\v'; break; break;
case 'x' : c = ScanHexEscape(c, 2); break; case 'n':
case '0' : // fall through c = '\n';
case '1' : // fall through break;
case '2' : // fall through case 'r':
case '3' : // fall through c = '\r';
case '4' : // fall through break;
case '5' : // fall through case 't':
case '6' : // fall through c = '\t';
case '7' : c = ScanOctalEscape(c, 2); break; break;
case 'u':
c = ScanHexEscape(c, 4);
break;
case 'v':
c = '\v';
break;
case 'x':
c = ScanHexEscape(c, 2);
break;
case '0': // fall through
case '1': // fall through
case '2': // fall through
case '3': // fall through
case '4': // fall through
case '5': // fall through
case '6': // fall through
case '7':
c = ScanOctalEscape(c, 2);
break;
} }
// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
...@@ -845,17 +860,17 @@ void Scanner::ScanEscape() { ...@@ -845,17 +860,17 @@ void Scanner::ScanEscape() {
AddChar(c); AddChar(c);
} }
Token::Value Scanner::ScanString() { Token::Value Scanner::ScanString() {
uc32 quote = c0_; uc32 quote = c0_;
Advance(); // consume quote Advance(); // consume quote
StartLiteral(); StartLiteral();
while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
uc32 c = c0_; uc32 c = c0_;
Advance(); Advance();
if (c == '\\') { if (c == '\\') {
if (c0_ < 0) return Token::ILLEGAL; if (c0_ < 0)
return Token::ILLEGAL;
ScanEscape(); ScanEscape();
} else { } else {
AddChar(c); AddChar(c);
...@@ -866,17 +881,15 @@ Token::Value Scanner::ScanString() { ...@@ -866,17 +881,15 @@ Token::Value Scanner::ScanString() {
} }
TerminateLiteral(); TerminateLiteral();
Advance(); // consume quote Advance(); // consume quote
return Token::STRING; return Token::STRING;
} }
Token::Value Scanner::Select(Token::Value tok) { Token::Value Scanner::Select(Token::Value tok) {
Advance(); Advance();
return tok; return tok;
} }
Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {
Advance(); Advance();
if (c0_ == next) { if (c0_ == next) {
...@@ -887,24 +900,24 @@ Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { ...@@ -887,24 +900,24 @@ Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {
} }
} }
// Returns true if any decimal digits were scanned, returns false otherwise. // Returns true if any decimal digits were scanned, returns false otherwise.
void Scanner::ScanDecimalDigits() { void Scanner::ScanDecimalDigits() {
while (IsDecimalDigit(c0_)) while (IsDecimalDigit(c0_))
AddCharAdvance(); AddCharAdvance();
} }
Token::Value Scanner::ScanNumber(bool seen_period) { Token::Value Scanner::ScanNumber(bool seen_period) {
ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; enum {
DECIMAL, HEX, OCTAL
} kind = DECIMAL;
StartLiteral(); StartLiteral();
if (seen_period) { if (seen_period) {
// we have already seen a decimal point of the float // we have already seen a decimal point of the float
AddChar('.'); AddChar('.');
ScanDecimalDigits(); // we know we have at least one digit ScanDecimalDigits(); // we know we have at least one digit
} else { } else {
// if the first character is '0' we must check for octals and hex // if the first character is '0' we must check for octals and hex
...@@ -930,7 +943,8 @@ Token::Value Scanner::ScanNumber(bool seen_period) { ...@@ -930,7 +943,8 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
kind = DECIMAL; kind = DECIMAL;
break; break;
} }
if (c0_ < '0' || '7' < c0_) break; if (c0_ < '0' || '7' < c0_)
break;
AddCharAdvance(); AddCharAdvance();
} }
} }
...@@ -938,18 +952,19 @@ Token::Value Scanner::ScanNumber(bool seen_period) { ...@@ -938,18 +952,19 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
// Parse decimal digits and allow trailing fractional part. // Parse decimal digits and allow trailing fractional part.
if (kind == DECIMAL) { if (kind == DECIMAL) {
ScanDecimalDigits(); // optional ScanDecimalDigits(); // optional
if (c0_ == '.') { if (c0_ == '.') {
AddCharAdvance(); AddCharAdvance();
ScanDecimalDigits(); // optional ScanDecimalDigits(); // optional
} }
} }
} }
// scan exponent, if any // scan exponent, if any
if (c0_ == 'e' || c0_ == 'E') { if (c0_ == 'e' || c0_ == 'E') {
ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed if (kind == OCTAL)
return Token::ILLEGAL; // no exponent for octals allowed
// scan exponent // scan exponent
AddCharAdvance(); AddCharAdvance();
if (c0_ == '+' || c0_ == '-') if (c0_ == '+' || c0_ == '-')
...@@ -971,19 +986,19 @@ Token::Value Scanner::ScanNumber(bool seen_period) { ...@@ -971,19 +986,19 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
return Token::NUMBER; return Token::NUMBER;
} }
uc32 Scanner::ScanIdentifierUnicodeEscape() { uc32 Scanner::ScanIdentifierUnicodeEscape() {
Advance(); Advance();
if (c0_ != 'u') return unibrow::Utf8::kBadChar; if (c0_ != 'u')
return unibrow::Utf8::kBadChar;
Advance(); Advance();
uc32 c = ScanHexEscape('u', 4); uc32 c = ScanHexEscape('u', 4);
// We do not allow a unicode escape sequence to start another // We do not allow a unicode escape sequence to start another
// unicode escape sequence. // unicode escape sequence.
if (c == '\\') return unibrow::Utf8::kBadChar; if (c == '\\')
return unibrow::Utf8::kBadChar;
return c; return c;
} }
Token::Value Scanner::ScanIdentifier() { Token::Value Scanner::ScanIdentifier() {
ASSERT(kIsIdentifierStart.get(c0_)); ASSERT(kIsIdentifierStart.get(c0_));
...@@ -994,7 +1009,8 @@ Token::Value Scanner::ScanIdentifier() { ...@@ -994,7 +1009,8 @@ Token::Value Scanner::ScanIdentifier() {
if (c0_ == '\\') { if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape(); uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters. // Only allow legal identifier start characters.
if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; if (!kIsIdentifierStart.get(c))
return Token::ILLEGAL;
AddChar(c); AddChar(c);
keyword_match.Fail(); keyword_match.Fail();
} else { } else {
...@@ -1008,7 +1024,8 @@ Token::Value Scanner::ScanIdentifier() { ...@@ -1008,7 +1024,8 @@ Token::Value Scanner::ScanIdentifier() {
if (c0_ == '\\') { if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape(); uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier part characters. // Only allow legal identifier part characters.
if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; if (!kIsIdentifierPart.get(c))
return Token::ILLEGAL;
AddChar(c); AddChar(c);
keyword_match.Fail(); keyword_match.Fail();
} else { } else {
...@@ -1022,19 +1039,19 @@ Token::Value Scanner::ScanIdentifier() { ...@@ -1022,19 +1039,19 @@ Token::Value Scanner::ScanIdentifier() {
return keyword_match.token(); return keyword_match.token();
} }
bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
// Checks whether the buffer contains an identifier (no escape). // Checks whether the buffer contains an identifier (no escape).
if (!buffer->has_more()) return false; if (!buffer->has_more())
if (!kIsIdentifierStart.get(buffer->GetNext())) return false; return false;
if (!kIsIdentifierStart.get(buffer->GetNext()))
return false;
while (buffer->has_more()) { while (buffer->has_more()) {
if (!kIsIdentifierPart.get(buffer->GetNext())) return false; if (!kIsIdentifierPart.get(buffer->GetNext()))
return false;
} }
return true; return true;
} }
bool Scanner::ScanRegExpPattern(bool seen_equal) { bool Scanner::ScanRegExpPattern(bool seen_equal) {
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false; bool in_character_class = false;
...@@ -1054,12 +1071,12 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) { ...@@ -1054,12 +1071,12 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
while (c0_ != '/' || in_character_class) { while (c0_ != '/' || in_character_class) {
if (kIsLineTerminator.get(c0_) || c0_ < 0) if (kIsLineTerminator.get(c0_) || c0_ < 0)
return false; return false;
if (c0_ == '\\') { // escaped character if (c0_ == '\\') { // escaped character
AddCharAdvance(); AddCharAdvance();
if (kIsLineTerminator.get(c0_) || c0_ < 0) if (kIsLineTerminator.get(c0_) || c0_ < 0)
return false; return false;
AddCharAdvance(); AddCharAdvance();
} else { // unescaped character } else { // unescaped character
if (c0_ == '[') if (c0_ == '[')
in_character_class = true; in_character_class = true;
if (c0_ == ']') if (c0_ == ']')
...@@ -1067,7 +1084,7 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) { ...@@ -1067,7 +1084,7 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
AddCharAdvance(); AddCharAdvance();
} }
} }
Advance(); // consume '/' Advance(); // consume '/'
TerminateLiteral(); TerminateLiteral();
...@@ -1080,7 +1097,7 @@ bool Scanner::ScanRegExpFlags() { ...@@ -1080,7 +1097,7 @@ bool Scanner::ScanRegExpFlags() {
while (kIsIdentifierPart.get(c0_)) { while (kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') { if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape(); uc32 c = ScanIdentifierUnicodeEscape();
if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { if (c != static_cast<uc32> (unibrow::Utf8::kBadChar)) {
// We allow any escaped character, unlike the restriction on // We allow any escaped character, unlike the restriction on
// IdentifierPart when it is used to build an IdentifierName. // IdentifierPart when it is used to build an IdentifierName.
AddChar(c); AddChar(c);
...@@ -1095,4 +1112,5 @@ bool Scanner::ScanRegExpFlags() { ...@@ -1095,4 +1112,5 @@ bool Scanner::ScanRegExpFlags() {
return true; return true;
} }
} } // namespace v8::internal }
} // namespace v8::internal
...@@ -41,6 +41,7 @@ class UTF8Buffer { ...@@ -41,6 +41,7 @@ class UTF8Buffer {
~UTF8Buffer(); ~UTF8Buffer();
void AddChar(uc32 c) { void AddChar(uc32 c) {
ASSERT_NOT_NULL(data_);
if (cursor_ <= limit_ && if (cursor_ <= limit_ &&
static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
*cursor_++ = static_cast<char>(c); *cursor_++ = static_cast<char>(c);
...@@ -49,16 +50,29 @@ class UTF8Buffer { ...@@ -49,16 +50,29 @@ class UTF8Buffer {
} }
} }
void Reset() { cursor_ = data_; } void Reset() {
int pos() const { return cursor_ - data_; } if (data_ == NULL) {
data_ = NewArray<char>(kInitialCapacity);
limit_ = ComputeLimit(data_, kInitialCapacity);
}
cursor_ = data_;
}
int pos() const {
ASSERT_NOT_NULL(data_);
return cursor_ - data_;
}
char* data() const { return data_; } char* data() const { return data_; }
private: private:
static const int kInitialCapacity = 256;
char* data_; char* data_;
char* cursor_; char* cursor_;
char* limit_; char* limit_;
int Capacity() const { int Capacity() const {
ASSERT_NOT_NULL(data_);
return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;
} }
...@@ -278,26 +292,30 @@ class Scanner { ...@@ -278,26 +292,30 @@ class Scanner {
// token returned by Next()). The string is 0-terminated and in // token returned by Next()). The string is 0-terminated and in
// UTF-8 format; they may contain 0-characters. Literal strings are // UTF-8 format; they may contain 0-characters. Literal strings are
// collected for identifiers, strings, and numbers. // collected for identifiers, strings, and numbers.
// These functions only give the correct result if the literal
// was scanned between calls to StartLiteral() and TerminateLiteral().
const char* literal_string() const { const char* literal_string() const {
return &literals_.data()[current_.literal_pos]; return current_.literal_buffer->data();
} }
int literal_length() const { int literal_length() const {
return current_.literal_end - current_.literal_pos; // Excluding terminal '\0' added by TerminateLiteral().
} return current_.literal_buffer->pos() - 1;
Vector<const char> next_literal() const {
return Vector<const char>(next_literal_string(), next_literal_length());
} }
// Returns the literal string for the next token (the token that // Returns the literal string for the next token (the token that
// would be returned if Next() were called). // would be returned if Next() were called).
const char* next_literal_string() const { const char* next_literal_string() const {
return &literals_.data()[next_.literal_pos]; return next_.literal_buffer->data();
} }
// Returns the length of the next token (that would be returned if // Returns the length of the next token (that would be returned if
// Next() were called). // Next() were called).
int next_literal_length() const { int next_literal_length() const {
return next_.literal_end - next_.literal_pos; return next_.literal_buffer->pos() - 1;
}
Vector<const char> next_literal() const {
return Vector<const char>(next_literal_string(),
next_literal_length());
} }
// Scans the input as a regular expression pattern, previous // Scans the input as a regular expression pattern, previous
...@@ -339,7 +357,8 @@ class Scanner { ...@@ -339,7 +357,8 @@ class Scanner {
// Buffer to hold literal values (identifiers, strings, numbers) // Buffer to hold literal values (identifiers, strings, numbers)
// using 0-terminated UTF-8 encoding. // using 0-terminated UTF-8 encoding.
UTF8Buffer literals_; UTF8Buffer literal_buffer_1_;
UTF8Buffer literal_buffer_2_;
bool stack_overflow_; bool stack_overflow_;
static StaticResource<Utf8Decoder> utf8_decoder_; static StaticResource<Utf8Decoder> utf8_decoder_;
...@@ -351,7 +370,7 @@ class Scanner { ...@@ -351,7 +370,7 @@ class Scanner {
struct TokenDesc { struct TokenDesc {
Token::Value token; Token::Value token;
Location location; Location location;
int literal_pos, literal_end; UTF8Buffer* literal_buffer;
}; };
TokenDesc current_; // desc for current token (as returned by Next()) TokenDesc current_; // desc for current token (as returned by Next())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment