Commit 2d4aa629 authored by lrn@chromium.org's avatar lrn@chromium.org

Extract scanner base/JS/JSON and move base and JS to scanner-base.

Remove templates from prescanner.

Review URL: http://codereview.chromium.org/5136002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5854 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 7c5cca29
......@@ -39,16 +39,6 @@
namespace v8 {
namespace internal {
int HexValue(uc32 c) {
if ('0' <= c && c <= '9')
return c - '0';
if ('a' <= c && c <= 'f')
return c - 'a' + 10;
if ('A' <= c && c <= 'F')
return c - 'A' + 10;
return -1;
}
namespace {
// C++-style iterator adaptor for StringInputBuffer
......
......@@ -75,11 +75,6 @@ static inline uint32_t DoubleToUint32(double x) {
}
// Returns the value (0 .. 15) of a hexadecimal character c.
// If c is not a legal hexadecimal character, returns a value < 0.
int HexValue(uc32 c);
// Enumeration for allowing octals and ignoring junk when converting
// strings to numbers.
enum ConversionFlags {
......
......@@ -728,7 +728,7 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source,
// Initialize parser state.
source->TryFlatten();
scanner_.Initialize(source, JAVASCRIPT);
scanner_.Initialize(source);
ASSERT(target_stack_ == NULL);
if (pre_data_ != NULL) pre_data_->Initialize();
......@@ -791,8 +791,7 @@ FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info) {
// Initialize parser state.
source->TryFlatten();
scanner_.Initialize(source, info->start_position(), info->end_position(),
JAVASCRIPT);
scanner_.Initialize(source, info->start_position(), info->end_position());
ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY;
......@@ -3613,7 +3612,7 @@ Expression* Parser::NewThrowError(Handle<String> constructor,
Handle<Object> JsonParser::ParseJson(Handle<String> source) {
source->TryFlatten();
scanner_.Initialize(source, JSON);
scanner_.Initialize(source);
Handle<Object> result = ParseJsonValue();
if (result.is_null() || scanner_.Next() != Token::EOS) {
if (scanner_.stack_overflow()) {
......@@ -4641,10 +4640,9 @@ int ScriptDataImpl::ReadNumber(byte** source) {
static ScriptDataImpl* DoPreParse(UTF16Buffer* stream,
bool allow_lazy,
PartialParserRecorder* recorder) {
typedef preparser::Scanner<UTF16Buffer, UTF8Buffer> PreScanner;
PreScanner scanner;
preparser::Scanner scanner;
scanner.Initialize(stream);
preparser::PreParser<PreScanner, PartialParserRecorder> preparser;
preparser::PreParser<preparser::Scanner, PartialParserRecorder> preparser;
if (!preparser.PreParseProgram(&scanner, recorder, allow_lazy)) {
Top::StackOverflow();
return NULL;
......
......@@ -682,7 +682,7 @@ class Parser {
Expression* ParseV8Intrinsic(bool* ok);
INLINE(Token::Value peek()) { return scanner_.peek(); }
INLINE(Token::Value Next()) { return scanner_.Next(); }
INLINE(Token::Value Next()) { return scanner_.NextCheckStack(); }
INLINE(void Consume(Token::Value token));
void Expect(Token::Value token, bool* ok);
bool Check(Token::Value token);
......@@ -760,7 +760,7 @@ class Parser {
ZoneList<Handle<String> > symbol_cache_;
Handle<Script> script_;
Scanner scanner_;
V8JavaScriptScanner scanner_;
Scope* top_scope_;
int with_nesting_level_;
......@@ -852,7 +852,7 @@ class JsonParser BASE_EMBEDDED {
// Converts the currently parsed literal to a JavaScript String.
Handle<String> GetString();
Scanner scanner_;
JsonScanner scanner_;
};
} } // namespace v8::internal
......
This diff is collapsed.
This diff is collapsed.
......@@ -37,11 +37,24 @@
#include "unicode-inl.h"
#include "char-predicates.h"
#include "utils.h"
#include "list-inl.h"
namespace v8 {
namespace internal {
// Interface through which the scanner reads characters from the input source.
// Returns the value (0 .. 15) of a hexadecimal character c.
// If c is not a legal hexadecimal character, returns a value < 0.
inline int HexValue(uc32 c) {
c -= '0';
if (static_cast<unsigned>(c) <= 9) return c;
c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
if (static_cast<unsigned>(c) <= 6) return c + 10;
return -1;
}
// ----------------------------------------------------------------------------
// UTF16Buffer - scanner input source with pushback.
class UTF16Buffer {
public:
UTF16Buffer();
......@@ -54,7 +67,11 @@ class UTF16Buffer {
int pos() const { return pos_; }
static const int kNoEndPosition = 1;
protected:
// Initial value of end_ before the input stream is initialized.
int pos_; // Current position in the buffer.
int end_; // Position where scanning should stop (EOF).
};
......@@ -79,6 +96,292 @@ class ScannerConstants : AllStatic {
static StaticResource<Utf8Decoder> utf8_decoder_;
};
// ----------------------------------------------------------------------------
// LiteralCollector - Collector of chars of literals.
class LiteralCollector {
public:
LiteralCollector();
~LiteralCollector();
inline void AddChar(uc32 c) {
if (recording_) {
if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
buffer_.Add(static_cast<char>(c));
} else {
AddCharSlow(c);
}
}
}
void StartLiteral() {
buffer_.StartSequence();
recording_ = true;
}
Vector<const char> EndLiteral() {
if (recording_) {
recording_ = false;
buffer_.Add(kEndMarker);
Vector<char> sequence = buffer_.EndSequence();
return Vector<const char>(sequence.start(), sequence.length());
}
return Vector<const char>();
}
void DropLiteral() {
if (recording_) {
recording_ = false;
buffer_.DropSequence();
}
}
void Reset() {
buffer_.Reset();
}
// The end marker added after a parsed literal.
// Using zero allows the usage of strlen and similar functions on
// identifiers and numbers (but not strings, since they may contain zero
// bytes).
static const char kEndMarker = '\x00';
private:
static const int kInitialCapacity = 256;
SequenceCollector<char, 4> buffer_;
bool recording_;
void AddCharSlow(uc32 c);
};
// ----------------------------------------------------------------------------
// Scanner base-class.
// Generic functionality used by both JSON and JavaScript scanners.
class Scanner {
public:
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
class LiteralScope {
public:
explicit LiteralScope(Scanner* self);
~LiteralScope();
void Complete();
private:
Scanner* scanner_;
bool complete_;
};
Scanner();
// Returns the current token again.
Token::Value current_token() { return current_.token; }
// One token look-ahead (past the token returned by Next()).
Token::Value peek() const { return next_.token; }
struct Location {
Location(int b, int e) : beg_pos(b), end_pos(e) { }
Location() : beg_pos(0), end_pos(0) { }
int beg_pos;
int end_pos;
};
// Returns the location information for the current token
// (the token returned by Next()).
Location location() const { return current_.location; }
Location peek_location() const { return next_.location; }
// Returns the literal string, if any, for the current token (the
// token returned by Next()). The string is 0-terminated and in
// UTF-8 format; they may contain 0-characters. Literal strings are
// collected for identifiers, strings, and numbers.
// These functions only give the correct result if the literal
// was scanned between calls to StartLiteral() and TerminateLiteral().
const char* literal_string() const {
return current_.literal_chars.start();
}
int literal_length() const {
// Excluding terminal '\x00' added by TerminateLiteral().
return current_.literal_chars.length() - 1;
}
Vector<const char> literal() const {
return Vector<const char>(literal_string(), literal_length());
}
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
const char* next_literal_string() const {
return next_.literal_chars.start();
}
// Returns the length of the next token (that would be returned if
// Next() were called).
int next_literal_length() const {
// Excluding terminal '\x00' added by TerminateLiteral().
return next_.literal_chars.length() - 1;
}
Vector<const char> next_literal() const {
return Vector<const char>(next_literal_string(), next_literal_length());
}
bool stack_overflow() { return stack_overflow_; }
static const int kCharacterLookaheadBufferSize = 1;
protected:
// The current and look-ahead token.
struct TokenDesc {
Token::Value token;
Location location;
Vector<const char> literal_chars;
};
// Call this after setting source_ to the input.
void Init() {
// Set c0_ (one character ahead)
ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
// Initialize current_ to not refer to a literal.
current_.literal_chars = Vector<const char>();
// Reset literal buffer.
literal_buffer_.Reset();
}
// Literal buffer support
inline void StartLiteral() {
literal_buffer_.StartLiteral();
}
inline void AddLiteralChar(uc32 c) {
literal_buffer_.AddChar(c);
}
// Complete scanning of a literal.
inline void TerminateLiteral() {
next_.literal_chars = literal_buffer_.EndLiteral();
}
// Stops scanning of a literal and drop the collected characters,
// e.g., due to an encountered error.
inline void DropLiteral() {
literal_buffer_.DropLiteral();
}
inline void AddLiteralCharAdvance() {
AddLiteralChar(c0_);
Advance();
}
// Low-level scanning support.
void Advance() { c0_ = source_->Advance(); }
void PushBack(uc32 ch) {
source_->PushBack(ch);
c0_ = ch;
}
inline Token::Value Select(Token::Value tok) {
Advance();
return tok;
}
inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
Advance();
if (c0_ == next) {
Advance();
return then;
} else {
return else_;
}
}
uc32 ScanHexEscape(uc32 c, int length);
uc32 ScanOctalEscape(uc32 c, int length);
// Return the current source position.
int source_pos() {
return source_->pos() - kCharacterLookaheadBufferSize;
}
TokenDesc current_; // desc for current token (as returned by Next())
TokenDesc next_; // desc for next token (one token look-ahead)
// Input stream. Must be initialized to an UTF16Buffer.
UTF16Buffer* source_;
// Buffer to hold literal values (identifiers, strings, numbers)
// using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
LiteralCollector literal_buffer_;
bool stack_overflow_;
// One Unicode character look-ahead; c0_ < 0 at the end of the input.
uc32 c0_;
};
// ----------------------------------------------------------------------------
// JavaScriptScanner - base logic for JavaScript scanning.
class JavaScriptScanner : public Scanner {
public:
JavaScriptScanner();
// Returns the next token.
Token::Value Next();
// Returns true if there was a line terminator before the peek'ed token.
bool has_line_terminator_before_next() const {
return has_line_terminator_before_next_;
}
// Scans the input as a regular expression pattern, previous
// character(s) must be /(=). Returns true if a pattern is scanned.
bool ScanRegExpPattern(bool seen_equal);
// Returns true if regexp flags are scanned (always since flags can
// be empty).
bool ScanRegExpFlags();
// Tells whether the buffer contains an identifier (no escapes).
// Used for checking if a property name is an identifier.
static bool IsIdentifier(unibrow::CharacterStream* buffer);
// Seek forward to the given position. This operation does not
// work in general, for instance when there are pushed back
// characters, but works for seeking forward until simple delimiter
// tokens, which is what it is used for.
void SeekForward(int pos);
protected:
bool SkipWhiteSpace();
Token::Value SkipSingleLineComment();
Token::Value SkipMultiLineComment();
// Scans a single JavaScript token.
void Scan();
void ScanDecimalDigits();
Token::Value ScanNumber(bool seen_period);
Token::Value ScanIdentifier();
void ScanEscape();
Token::Value ScanString();
// Scans a possible HTML comment -- begins with '<!'.
Token::Value ScanHtmlComment();
// Decodes a unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
bool has_line_terminator_before_next_;
};
// ----------------------------------------------------------------------------
// Keyword matching state machine.
class KeywordMatcher {
// Incrementally recognize keywords.
......
This diff is collapsed.
This diff is collapsed.
......@@ -36,7 +36,6 @@
#include "parser.h"
#include "utils.h"
#include "execution.h"
#include "scanner.h"
#include "preparser.h"
#include "cctest.h"
......@@ -262,9 +261,10 @@ TEST(StandAlonePreParser) {
const char* program = programs[i];
unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
i::CompleteParserRecorder log;
i::Scanner scanner;
scanner.Initialize(i::Handle<i::String>::null(), &stream, i::JAVASCRIPT);
v8::preparser::PreParser<i::Scanner, i::CompleteParserRecorder> preparser;
i::V8JavaScriptScanner scanner;
scanner.Initialize(i::Handle<i::String>::null(), &stream);
v8::preparser::PreParser<i::V8JavaScriptScanner,
i::CompleteParserRecorder> preparser;
bool result = preparser.PreParseProgram(&scanner, &log, true);
CHECK(result);
i::ScriptDataImpl data(log.ExtractData());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment