Commit 47c18709 authored by lrn@chromium.org's avatar lrn@chromium.org

Move static scanner fields to scanner-base.h

Review URL: http://codereview.chromium.org/5026005

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5828 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 3b248841
......@@ -33,7 +33,7 @@
#include "conversions-inl.h"
#include "dtoa.h"
#include "factory.h"
#include "scanner.h"
#include "scanner-base.h"
#include "strtod.h"
namespace v8 {
......@@ -121,7 +121,7 @@ static const double JUNK_STRING_VALUE = OS::nan_value();
template <class Iterator, class EndMark>
static inline bool AdvanceToNonspace(Iterator* current, EndMark end) {
while (*current != end) {
if (!Scanner::kIsWhiteSpace.get(**current)) return true;
if (!ScannerConstants::kIsWhiteSpace.get(**current)) return true;
++*current;
}
return false;
......
......@@ -28,7 +28,8 @@
#ifndef V8_DATEPARSER_H_
#define V8_DATEPARSER_H_
#include "scanner.h"
#include "char-predicates-inl.h"
#include "scanner-base.h"
namespace v8 {
namespace internal {
......@@ -99,10 +100,20 @@ class DateParser : public AllStatic {
}
// The skip methods return whether they actually skipped something.
bool Skip(uint32_t c) { return ch_ == c ? (Next(), true) : false; }
bool Skip(uint32_t c) {
if (ch_ == c) {
Next();
return true;
}
return false;
}
bool SkipWhiteSpace() {
return Scanner::kIsWhiteSpace.get(ch_) ? (Next(), true) : false;
if (ScannerConstants::kIsWhiteSpace.get(ch_)) {
Next();
return true;
}
return false;
}
bool SkipParentheses() {
......
......@@ -38,7 +38,7 @@
#include "mark-compact.h"
#include "natives.h"
#include "objects-visiting.h"
#include "scanner.h"
#include "scanner-base.h"
#include "scopeinfo.h"
#include "snapshot.h"
#include "v8threads.h"
......@@ -3249,7 +3249,8 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
const uc32 kMaxSupportedChar = 0xFFFF;
// Count the number of characters in the UTF-8 string and check if
// it is an ASCII string.
Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder());
Access<ScannerConstants::Utf8Decoder>
decoder(ScannerConstants::utf8_decoder());
decoder->Reset(string.start(), string.length());
int chars = 0;
bool is_ascii = true;
......
......@@ -35,7 +35,7 @@
#include "objects-inl.h"
#include "objects-visiting.h"
#include "macro-assembler.h"
#include "scanner.h"
#include "scanner-base.h"
#include "scopeinfo.h"
#include "string-stream.h"
#include "utils.h"
......@@ -1208,7 +1208,8 @@ MaybeObject* JSObject::AddFastProperty(String* name,
// Normalize the object if the name is an actual string (not the
// hidden symbols) and is not a real identifier.
StringInputBuffer buffer(name);
if (!Scanner::IsIdentifier(&buffer) && name != Heap::hidden_symbol()) {
if (!ScannerConstants::IsIdentifier(&buffer)
&& name != Heap::hidden_symbol()) {
Object* obj;
{ MaybeObject* maybe_obj =
NormalizeProperties(CLEAR_INOBJECT_PROPERTIES, 0);
......@@ -5088,7 +5089,8 @@ bool String::MarkAsUndetectable() {
bool String::IsEqualTo(Vector<const char> str) {
int slen = length();
Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder());
Access<ScannerConstants::Utf8Decoder>
decoder(ScannerConstants::utf8_decoder());
decoder->Reset(str.start(), str.length());
int i;
for (i = 0; i < slen && decoder->has_more(); i++) {
......
......@@ -36,6 +36,7 @@
#include "messages.h"
#include "parser.h"
#include "platform.h"
#include "prescanner.h"
#include "preparser.h"
#include "runtime.h"
#include "scopeinfo.h"
......@@ -4667,9 +4668,21 @@ ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
preparser::PreParser<Scanner, CompleteParserRecorder> parser;
Scanner scanner;
scanner.Initialize(source, stream, JAVASCRIPT);
int length = 0;
SafeStringInputBuffer safe_stream;
if (!source.is_null()) {
length = source->length();
safe_stream.Reset(source.location());
stream = &safe_stream;
} else {
length = stream->Length();
}
typedef preparser::Scanner<CharacterStreamUTF16Buffer, UTF8Buffer> PreScanner;
preparser::PreParser<PreScanner, CompleteParserRecorder> parser;
CharacterStreamUTF16Buffer buffer;
buffer.Initialize(source, stream, 0, length);
PreScanner scanner;
scanner.Initialize(&buffer);
bool allow_lazy = FLAG_lazy && (extension == NULL);
CompleteParserRecorder recorder;
if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
......
......@@ -33,6 +33,33 @@
namespace v8 {
namespace internal {
// ----------------------------------------------------------------------------
// Character predicates
unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;
unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;
unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;
unibrow::Predicate<unibrow::LineTerminator, 128>
ScannerConstants::kIsLineTerminator;
StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;
// Compound predicates.
bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) {
// Checks whether the buffer contains an identifier (no escape).
if (!buffer->has_more()) return false;
if (!kIsIdentifierStart.get(buffer->GetNext())) {
return false;
}
while (buffer->has_more()) {
if (!kIsIdentifierPart.get(buffer->GetNext())) {
return false;
}
}
return true;
}
// ----------------------------------------------------------------------------
// Keyword Matcher
......
......@@ -30,12 +30,37 @@
#ifndef V8_SCANNER_BASE_H_
#define V8_SCANNER_BASE_H_
#include "globals.h"
#include "checks.h"
#include "allocation.h"
#include "token.h"
#include "unicode.h"
#include "unicode-inl.h"
#include "char-predicates.h"
#include "utils.h"
namespace v8 {
namespace internal {
class ScannerConstants : AllStatic {
public:
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
static StaticResource<Utf8Decoder>* utf8_decoder() {
return &utf8_decoder_;
}
static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
static bool IsIdentifier(unibrow::CharacterStream* buffer);
private:
static StaticResource<Utf8Decoder> utf8_decoder_;
};
class KeywordMatcher {
// Incrementally recognize keywords.
//
......@@ -45,7 +70,8 @@ class KeywordMatcher {
// return switch this throw true try typeof var void while with
//
// *: Actually "future reserved keywords". These are the only ones we
// recognized, the remaining are allowed as identifiers.
// recognize, the remaining are allowed as identifiers.
// In ES5 strict mode, we should disallow all reserved keywords.
public:
KeywordMatcher()
: state_(INITIAL),
......@@ -156,10 +182,6 @@ class KeywordMatcher {
};
} } // namespace v8::internal
#endif // V8_SCANNER_BASE_H_
......@@ -30,23 +30,11 @@
#include "ast.h"
#include "handles.h"
#include "scanner.h"
#include "unicode-inl.h"
namespace v8 {
namespace internal {
// ----------------------------------------------------------------------------
// Character predicates
unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
// ----------------------------------------------------------------------------
// UTF8Buffer
......@@ -358,9 +346,9 @@ bool Scanner::SkipJavaScriptWhiteSpace() {
while (true) {
// We treat byte-order marks (BOMs) as whitespace for better
// compatibility with Spidermonkey and other JavaScript engines.
while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
// IsWhiteSpace() includes line terminators!
if (kIsLineTerminator.get(c0_)) {
if (ScannerConstants::kIsLineTerminator.get(c0_)) {
// Ignore line terminators, but remember them. This is necessary
// for automatic semicolon insertion.
has_line_terminator_before_next_ = true;
......@@ -400,7 +388,7 @@ Token::Value Scanner::SkipSingleLineComment() {
// separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar (see
// ECMA-262, section 7.4, page 12).
while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
Advance();
}
......@@ -631,7 +619,7 @@ Token::Value Scanner::ScanJsonIdentifier(const char* text,
Advance();
text++;
}
if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
literal.Complete();
return token;
}
......@@ -854,7 +842,7 @@ void Scanner::ScanJavaScript() {
break;
default:
if (kIsIdentifierStart.get(c0_)) {
if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
token = ScanIdentifier();
} else if (IsDecimalDigit(c0_)) {
token = ScanNumber(false);
......@@ -937,7 +925,7 @@ void Scanner::ScanEscape() {
Advance();
// Skip escaped newlines.
if (kIsLineTerminator.get(c)) {
if (ScannerConstants::kIsLineTerminator.get(c)) {
// Allow CR+LF newlines in multiline string literals.
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
// Allow LF+CR newlines in multiline string literals.
......@@ -979,7 +967,8 @@ Token::Value Scanner::ScanString() {
Advance(); // consume quote
LiteralScope literal(this);
while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
while (c0_ != quote && c0_ >= 0
&& !ScannerConstants::kIsLineTerminator.get(c0_)) {
uc32 c = c0_;
Advance();
if (c == '\\') {
......@@ -1092,7 +1081,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
// not be an identifier start or a decimal digit; see ECMA-262
// section 7.8.3, page 17 (note that we read only one decimal digit
// if the value is 0).
if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))
return Token::ILLEGAL;
literal.Complete();
......@@ -1114,7 +1103,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
Token::Value Scanner::ScanIdentifier() {
ASSERT(kIsIdentifierStart.get(c0_));
ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
LiteralScope literal(this);
KeywordMatcher keyword_match;
......@@ -1123,7 +1112,7 @@ Token::Value Scanner::ScanIdentifier() {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters.
if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
AddChar(c);
keyword_match.Fail();
} else {
......@@ -1133,11 +1122,11 @@ Token::Value Scanner::ScanIdentifier() {
}
// Scan the rest of the identifier characters.
while (kIsIdentifierPart.get(c0_)) {
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier part characters.
if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
AddChar(c);
keyword_match.Fail();
} else {
......@@ -1153,17 +1142,6 @@ Token::Value Scanner::ScanIdentifier() {
bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
// Checks whether the buffer contains an identifier (no escape).
if (!buffer->has_more()) return false;
if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
while (buffer->has_more()) {
if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
}
return true;
}
bool Scanner::ScanRegExpPattern(bool seen_equal) {
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false;
......@@ -1181,10 +1159,10 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
AddChar('=');
while (c0_ != '/' || in_character_class) {
if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
if (c0_ == '\\') { // escaped character
AddCharAdvance();
if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
AddCharAdvance();
} else { // unescaped character
if (c0_ == '[') in_character_class = true;
......@@ -1202,7 +1180,7 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
bool Scanner::ScanRegExpFlags() {
// Scan regular expression flags.
LiteralScope literal(this);
while (kIsIdentifierPart.get(c0_)) {
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
......
......@@ -251,17 +251,10 @@ class Scanner {
bool stack_overflow() { return stack_overflow_; }
static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }
// Tells whether the buffer contains an identifier (no escapes).
// Used for checking if a property name is an identifier.
static bool IsIdentifier(unibrow::CharacterStream* buffer);
static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
static const int kCharacterLookaheadBufferSize = 1;
static const int kNoEndPosition = 1;
......@@ -391,7 +384,6 @@ class Scanner {
UTF8Buffer literal_buffer_;
bool stack_overflow_;
static StaticResource<Utf8Decoder> utf8_decoder_;
// One Unicode character look-ahead; c0_ < 0 at the end of the input.
uc32 c0_;
......
......@@ -44,6 +44,7 @@ bool V8::has_been_setup_ = false;
bool V8::has_been_disposed_ = false;
bool V8::has_fatal_error_ = false;
bool V8::Initialize(Deserializer* des) {
bool create_heap_objects = des == NULL;
if (has_been_disposed_ || has_fatal_error_) return false;
......
......@@ -38,6 +38,7 @@
#include "utils.h"
#include "cctest.h"
#include "parser.h"
#include "unicode-inl.h"
static const bool kLogThreading = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment