Commit c9928c05 authored by lrn@chromium.org's avatar lrn@chromium.org

Change scanner buffers to not use utf-8.

Make preparser keep its symbol text itself instead of relying on the scanner.

Review URL: http://codereview.chromium.org/6075005

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6115 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 4e32d5ad
......@@ -99,6 +99,14 @@ Handle<String> Factory::LookupSymbol(Vector<const char> string) {
CALL_HEAP_FUNCTION(Heap::LookupSymbol(string), String);
}
Handle<String> Factory::LookupAsciiSymbol(Vector<const char> string) {
CALL_HEAP_FUNCTION(Heap::LookupAsciiSymbol(string), String);
}
Handle<String> Factory::LookupTwoByteSymbol(Vector<const uc16> string) {
CALL_HEAP_FUNCTION(Heap::LookupTwoByteSymbol(string), String);
}
Handle<String> Factory::NewStringFromAscii(Vector<const char> string,
PretenureFlag pretenure) {
......
......@@ -61,6 +61,8 @@ class Factory : public AllStatic {
PretenureFlag pretenure);
static Handle<String> LookupSymbol(Vector<const char> str);
static Handle<String> LookupAsciiSymbol(Vector<const char> str);
static Handle<String> LookupTwoByteSymbol(Vector<const uc16> str);
static Handle<String> LookupAsciiSymbol(const char* str) {
return LookupSymbol(CStrVector(str));
}
......
......@@ -181,10 +181,6 @@ typedef byte* Address;
#define USING_BSD_ABI
#endif
// Code-point values in Unicode 4.0 are 21 bits wide.
typedef uint16_t uc16;
typedef int32_t uc32;
// -----------------------------------------------------------------------------
// Constants
......@@ -228,6 +224,15 @@ const int kBinary32MinExponent = 0x01;
const int kBinary32MantissaBits = 23;
const int kBinary32ExponentShift = 23;
// ASCII/UC16 constants
// Code-point values in Unicode 4.0 are 21 bits wide.
typedef uint16_t uc16;
typedef int32_t uc32;
const int kASCIISize = kCharSize;
const int kUC16Size = sizeof(uc16); // NOLINT
const uc32 kMaxAsciiCharCode = 0x7f;
const uint32_t kMaxAsciiCharCodeU = 0x7fu;
// The expression OFFSET_OF(type, field) computes the byte-offset
// of the specified field relative to the containing type. This
......
......@@ -62,6 +62,71 @@ MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
}
MaybeObject* Heap::AllocateAsciiSymbol(Vector<const char> str,
uint32_t hash_field) {
if (str.length() > SeqAsciiString::kMaxLength) {
return Failure::OutOfMemoryException();
}
// Compute map and object size.
Map* map = ascii_symbol_map();
int size = SeqAsciiString::SizeFor(str.length());
// Allocate string.
Object* result;
{ MaybeObject* maybe_result = (size > MaxObjectSizeInPagedSpace())
? lo_space_->AllocateRaw(size)
: old_data_space_->AllocateRaw(size);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
reinterpret_cast<HeapObject*>(result)->set_map(map);
// Set length and hash fields of the allocated string.
String* answer = String::cast(result);
answer->set_length(str.length());
answer->set_hash_field(hash_field);
ASSERT_EQ(size, answer->Size());
// Fill in the characters.
memcpy(answer->address() + SeqAsciiString::kHeaderSize,
str.start(), str.length());
return answer;
}
MaybeObject* Heap::AllocateTwoByteSymbol(Vector<const uc16> str,
uint32_t hash_field) {
if (str.length() > SeqTwoByteString::kMaxLength) {
return Failure::OutOfMemoryException();
}
// Compute map and object size.
Map* map = symbol_map();
int size = SeqTwoByteString::SizeFor(str.length());
// Allocate string.
Object* result;
{ MaybeObject* maybe_result = (size > MaxObjectSizeInPagedSpace())
? lo_space_->AllocateRaw(size)
: old_data_space_->AllocateRaw(size);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
reinterpret_cast<HeapObject*>(result)->set_map(map);
// Set length and hash fields of the allocated string.
String* answer = String::cast(result);
answer->set_length(str.length());
answer->set_hash_field(hash_field);
ASSERT_EQ(size, answer->Size());
// Fill in the characters.
memcpy(answer->address() + SeqTwoByteString::kHeaderSize,
str.start(), str.length() * kUC16Size);
return answer;
}
MaybeObject* Heap::CopyFixedArray(FixedArray* src) {
return CopyFixedArrayWithMap(src, src->map());
}
......
......@@ -4013,6 +4013,36 @@ MaybeObject* Heap::LookupSymbol(Vector<const char> string) {
}
MaybeObject* Heap::LookupAsciiSymbol(Vector<const char> string) {
Object* symbol = NULL;
Object* new_table;
{ MaybeObject* maybe_new_table =
symbol_table()->LookupAsciiSymbol(string, &symbol);
if (!maybe_new_table->ToObject(&new_table)) return maybe_new_table;
}
// Can't use set_symbol_table because SymbolTable::cast knows that
// SymbolTable is a singleton and checks for identity.
roots_[kSymbolTableRootIndex] = new_table;
ASSERT(symbol != NULL);
return symbol;
}
MaybeObject* Heap::LookupTwoByteSymbol(Vector<const uc16> string) {
Object* symbol = NULL;
Object* new_table;
{ MaybeObject* maybe_new_table =
symbol_table()->LookupTwoByteSymbol(string, &symbol);
if (!maybe_new_table->ToObject(&new_table)) return maybe_new_table;
}
// Can't use set_symbol_table because SymbolTable::cast knows that
// SymbolTable is a singleton and checks for identity.
roots_[kSymbolTableRootIndex] = new_table;
ASSERT(symbol != NULL);
return symbol;
}
MaybeObject* Heap::LookupSymbol(String* string) {
if (string->IsSymbol()) return string;
Object* symbol = NULL;
......
......@@ -431,6 +431,14 @@ class Heap : public AllStatic {
int chars,
uint32_t hash_field);
MUST_USE_RESULT static inline MaybeObject* AllocateAsciiSymbol(
Vector<const char> str,
uint32_t hash_field);
MUST_USE_RESULT static inline MaybeObject* AllocateTwoByteSymbol(
Vector<const uc16> str,
uint32_t hash_field);
MUST_USE_RESULT static MaybeObject* AllocateInternalSymbol(
unibrow::CharacterStream* buffer, int chars, uint32_t hash_field);
......@@ -686,6 +694,9 @@ class Heap : public AllStatic {
// failed.
// Please note this function does not perform a garbage collection.
MUST_USE_RESULT static MaybeObject* LookupSymbol(Vector<const char> str);
MUST_USE_RESULT static MaybeObject* LookupAsciiSymbol(Vector<const char> str);
MUST_USE_RESULT static MaybeObject* LookupTwoByteSymbol(
Vector<const uc16> str);
MUST_USE_RESULT static MaybeObject* LookupAsciiSymbol(const char* str) {
return LookupSymbol(CStrVector(str));
}
......
......@@ -5143,6 +5143,26 @@ bool String::IsEqualTo(Vector<const char> str) {
}
bool String::IsAsciiEqualTo(Vector<const char> str) {
int slen = length();
if (str.length() != slen) return false;
for (int i = 0; i < slen; i++) {
if (Get(i) != static_cast<uint16_t>(str[i])) return false;
}
return true;
}
bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
int slen = length();
if (str.length() != slen) return false;
for (int i = 0; i < slen; i++) {
if (Get(i) != str[i]) return false;
}
return true;
}
template <typename schar>
static inline uint32_t HashSequentialString(const schar* chars, int length) {
StringHasher hasher(length);
......@@ -8086,6 +8106,85 @@ class Utf8SymbolKey : public HashTableKey {
};
template <typename Char>
class SequentialSymbolKey : public HashTableKey {
public:
explicit SequentialSymbolKey(Vector<const Char> string)
: string_(string), hash_field_(0) { }
uint32_t Hash() {
StringHasher hasher(string_.length());
// Very long strings have a trivial hash that doesn't inspect the
// string contents.
if (hasher.has_trivial_hash()) {
hash_field_ = hasher.GetHashField();
} else {
int i = 0;
// Do the iterative array index computation as long as there is a
// chance this is an array index.
while (i < string_.length() && hasher.is_array_index()) {
hasher.AddCharacter(static_cast<uc32>(string_[i]));
i++;
}
// Process the remaining characters without updating the array
// index.
while (i < string_.length()) {
hasher.AddCharacterNoIndex(static_cast<uc32>(string_[i]));
i++;
}
hash_field_ = hasher.GetHashField();
}
uint32_t result = hash_field_ >> String::kHashShift;
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
return result;
}
uint32_t HashForObject(Object* other) {
return String::cast(other)->Hash();
}
Vector<const Char> string_;
uint32_t hash_field_;
};
class AsciiSymbolKey : public SequentialSymbolKey<char> {
public:
explicit AsciiSymbolKey(Vector<const char> str)
: SequentialSymbolKey<char>(str) { }
bool IsMatch(Object* string) {
return String::cast(string)->IsAsciiEqualTo(string_);
}
MaybeObject* AsObject() {
if (hash_field_ == 0) Hash();
return Heap::AllocateAsciiSymbol(string_, hash_field_);
}
};
class TwoByteSymbolKey : public SequentialSymbolKey<uc16> {
public:
explicit TwoByteSymbolKey(Vector<const uc16> str)
: SequentialSymbolKey<uc16>(str) { }
bool IsMatch(Object* string) {
return String::cast(string)->IsTwoByteEqualTo(string_);
}
MaybeObject* AsObject() {
if (hash_field_ == 0) Hash();
return Heap::AllocateTwoByteSymbol(string_, hash_field_);
}
};
// SymbolKey carries a string/symbol object as key.
class SymbolKey : public HashTableKey {
public:
......@@ -8830,6 +8929,19 @@ MaybeObject* SymbolTable::LookupSymbol(Vector<const char> str, Object** s) {
}
MaybeObject* SymbolTable::LookupAsciiSymbol(Vector<const char> str,
Object** s) {
AsciiSymbolKey key(str);
return LookupKey(&key, s);
}
MaybeObject* SymbolTable::LookupTwoByteSymbol(Vector<const uc16> str,
Object** s) {
TwoByteSymbolKey key(str);
return LookupKey(&key, s);
}
MaybeObject* SymbolTable::LookupKey(HashTableKey* key, Object** s) {
int entry = FindEntry(key);
......
......@@ -2327,6 +2327,10 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> {
// been enlarged. If the return value is not a failure, the symbol
// pointer *s is set to the symbol found.
MUST_USE_RESULT MaybeObject* LookupSymbol(Vector<const char> str, Object** s);
MUST_USE_RESULT MaybeObject* LookupAsciiSymbol(Vector<const char> str,
Object** s);
MUST_USE_RESULT MaybeObject* LookupTwoByteSymbol(Vector<const uc16> str,
Object** s);
MUST_USE_RESULT MaybeObject* LookupString(String* key, Object** s);
// Looks up a symbol that is equal to the given string and returns
......@@ -5074,6 +5078,8 @@ class String: public HeapObject {
// String equality operations.
inline bool Equals(String* other);
bool IsEqualTo(Vector<const char> str);
bool IsAsciiEqualTo(Vector<const char> str);
bool IsTwoByteEqualTo(Vector<const uc16> str);
// Return a UTF8 representation of the string. The string is null
// terminated but may optionally contain nulls. Length is returned
......
......@@ -323,22 +323,24 @@ TemporaryScope::~TemporaryScope() {
}
Handle<String> Parser::LookupSymbol(int symbol_id,
Vector<const char> string) {
Handle<String> Parser::LookupSymbol(int symbol_id) {
// Length of symbol cache is the number of identified symbols.
// If we are larger than that, or negative, it's not a cached symbol.
// This might also happen if there is no preparser symbol data, even
// if there is some preparser data.
if (static_cast<unsigned>(symbol_id)
>= static_cast<unsigned>(symbol_cache_.length())) {
return Factory::LookupSymbol(string);
if (scanner().is_literal_ascii()) {
return Factory::LookupAsciiSymbol(scanner().literal_ascii_string());
} else {
return Factory::LookupTwoByteSymbol(scanner().literal_uc16_string());
}
}
return LookupCachedSymbol(symbol_id, string);
return LookupCachedSymbol(symbol_id);
}
Handle<String> Parser::LookupCachedSymbol(int symbol_id,
Vector<const char> string) {
Handle<String> Parser::LookupCachedSymbol(int symbol_id) {
// Make sure the cache is large enough to hold the symbol identifier.
if (symbol_cache_.length() <= symbol_id) {
// Increase length to index + 1.
......@@ -347,7 +349,11 @@ Handle<String> Parser::LookupCachedSymbol(int symbol_id,
}
Handle<String> result = symbol_cache_.at(symbol_id);
if (result.is_null()) {
result = Factory::LookupSymbol(string);
if (scanner().is_literal_ascii()) {
result = Factory::LookupAsciiSymbol(scanner().literal_ascii_string());
} else {
result = Factory::LookupTwoByteSymbol(scanner().literal_uc16_string());
}
symbol_cache_.at(symbol_id) = result;
return result;
}
......@@ -615,11 +621,11 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source,
// identical calls.
ExternalTwoByteStringUC16CharacterStream stream(
Handle<ExternalTwoByteString>::cast(source), 0, source->length());
scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals);
scanner_.Initialize(&stream);
return DoParseProgram(source, in_global_context, &zone_scope);
} else {
GenericStringUC16CharacterStream stream(source, 0, source->length());
scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals);
scanner_.Initialize(&stream);
return DoParseProgram(source, in_global_context, &zone_scope);
}
}
......@@ -705,7 +711,7 @@ FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info) {
FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info,
UC16CharacterStream* source,
ZoneScope* zone_scope) {
scanner_.Initialize(source, JavaScriptScanner::kAllLiterals);
scanner_.Initialize(source);
ASSERT(target_stack_ == NULL);
Handle<String> name(String::cast(info->name()));
......@@ -757,7 +763,7 @@ Handle<String> Parser::GetSymbol(bool* ok) {
if (pre_data() != NULL) {
symbol_id = pre_data()->GetSymbolIdentifier();
}
return LookupSymbol(symbol_id, scanner().literal());
return LookupSymbol(symbol_id);
}
......@@ -2715,8 +2721,9 @@ Expression* Parser::ParsePrimaryExpression(bool* ok) {
case Token::NUMBER: {
Consume(Token::NUMBER);
double value =
StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS);
ASSERT(scanner().is_literal_ascii());
double value = StringToDouble(scanner().literal_ascii_string(),
ALLOW_HEX | ALLOW_OCTALS);
result = NewNumberLiteral(value);
break;
}
......@@ -3066,8 +3073,9 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
}
case Token::NUMBER: {
Consume(Token::NUMBER);
double value =
StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS);
ASSERT(scanner().is_literal_ascii());
double value = StringToDouble(scanner().literal_ascii_string(),
ALLOW_HEX | ALLOW_OCTALS);
key = NewNumberLiteral(value);
break;
}
......@@ -3137,11 +3145,9 @@ Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) {
int literal_index = temp_scope_->NextMaterializedLiteralIndex();
Handle<String> js_pattern =
Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
Handle<String> js_pattern = NextLiteralString(TENURED);
scanner().ScanRegExpFlags();
Handle<String> js_flags =
Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
Handle<String> js_flags = NextLiteralString(TENURED);
Next();
return new RegExpLiteral(js_pattern, js_flags, literal_index);
......@@ -3423,10 +3429,10 @@ Handle<String> Parser::ParseIdentifierOrGetOrSet(bool* is_get,
bool* ok) {
Expect(Token::IDENTIFIER, ok);
if (!*ok) return Handle<String>();
if (scanner().literal_length() == 3) {
const char* token = scanner().literal_string();
*is_get = strcmp(token, "get") == 0;
*is_set = !*is_get && strcmp(token, "set") == 0;
if (scanner().is_literal_ascii() && scanner().literal_length() == 3) {
const char* token = scanner().literal_ascii_string().start();
*is_get = strncmp(token, "get", 3) == 0;
*is_set = !*is_get && strncmp(token, "set", 3) == 0;
}
return GetSymbol(ok);
}
......@@ -3604,9 +3610,11 @@ Handle<String> JsonParser::GetString() {
if (literal_length == 0) {
return Factory::empty_string();
}
const char* literal_string = scanner_.literal_string();
Vector<const char> literal(literal_string, literal_length);
return Factory::NewStringFromUtf8(literal);
if (scanner_.is_literal_ascii()) {
return Factory::NewStringFromAscii(scanner_.literal_ascii_string());
} else {
return Factory::NewStringFromTwoByte(scanner_.literal_uc16_string());
}
}
......@@ -3618,7 +3626,8 @@ Handle<Object> JsonParser::ParseJsonValue() {
return GetString();
}
case Token::NUMBER: {
double value = StringToDouble(scanner_.literal(),
ASSERT(scanner_.is_literal_ascii());
double value = StringToDouble(scanner_.literal_ascii_string(),
NO_FLAGS, // Hex, octal or trailing junk.
OS::nan_value());
return Factory::NewNumber(value);
......@@ -4597,10 +4606,9 @@ int ScriptDataImpl::ReadNumber(byte** source) {
// Create a Scanner for the preparser to use as input, and preparse the source.
static ScriptDataImpl* DoPreParse(UC16CharacterStream* source,
bool allow_lazy,
ParserRecorder* recorder,
int literal_flags) {
ParserRecorder* recorder) {
V8JavaScriptScanner scanner;
scanner.Initialize(source, literal_flags);
scanner.Initialize(source);
intptr_t stack_limit = StackGuard::real_climit();
if (!preparser::PreParser::PreParseProgram(&scanner,
recorder,
......@@ -4628,8 +4636,7 @@ ScriptDataImpl* ParserApi::PartialPreParse(UC16CharacterStream* source,
return NULL;
}
PartialParserRecorder recorder;
return DoPreParse(source, allow_lazy, &recorder,
JavaScriptScanner::kNoLiterals);
return DoPreParse(source, allow_lazy, &recorder);
}
......@@ -4638,9 +4645,7 @@ ScriptDataImpl* ParserApi::PreParse(UC16CharacterStream* source,
Handle<Script> no_script;
bool allow_lazy = FLAG_lazy && (extension == NULL);
CompleteParserRecorder recorder;
int kPreParseLiteralsFlags =
JavaScriptScanner::kLiteralString | JavaScriptScanner::kLiteralIdentifier;
return DoPreParse(source, allow_lazy, &recorder, kPreParseLiteralsFlags);
return DoPreParse(source, allow_lazy, &recorder);
}
......
......@@ -578,6 +578,26 @@ class Parser {
bool Check(Token::Value token);
void ExpectSemicolon(bool* ok);
Handle<String> LiteralString(PretenureFlag tenured) {
if (scanner().is_literal_ascii()) {
return Factory::NewStringFromAscii(scanner().literal_ascii_string(),
tenured);
} else {
return Factory::NewStringFromTwoByte(scanner().literal_uc16_string(),
tenured);
}
}
Handle<String> NextLiteralString(PretenureFlag tenured) {
if (scanner().is_next_literal_ascii()) {
return Factory::NewStringFromAscii(scanner().next_literal_ascii_string(),
tenured);
} else {
return Factory::NewStringFromTwoByte(scanner().next_literal_uc16_string(),
tenured);
}
}
Handle<String> GetSymbol(bool* ok);
// Get odd-ball literals.
......@@ -612,11 +632,9 @@ class Parser {
Scope* NewScope(Scope* parent, Scope::Type type, bool inside_with);
Handle<String> LookupSymbol(int symbol_id,
Vector<const char> string);
Handle<String> LookupSymbol(int symbol_id);
Handle<String> LookupCachedSymbol(int symbol_id,
Vector<const char> string);
Handle<String> LookupCachedSymbol(int symbol_id);
Expression* NewCall(Expression* expression,
ZoneList<Expression*>* arguments,
......
......@@ -110,26 +110,29 @@ Vector<unsigned> PartialParserRecorder::ExtractData() {
CompleteParserRecorder::CompleteParserRecorder()
: FunctionLoggingParserRecorder(),
literal_chars_(0),
symbol_store_(0),
symbol_entries_(0),
symbol_keys_(0),
symbol_table_(vector_compare),
symbol_id_(0) {
}
void CompleteParserRecorder::LogSymbol(
int start, const char* literal_chars, int length) {
if (!is_recording_) return;
Vector<const char> literal(literal_chars, length);
int hash = vector_hash(literal);
HashMap::Entry* entry = symbol_table_.Lookup(&literal, hash, true);
void CompleteParserRecorder::LogSymbol(int start,
int hash,
bool is_ascii,
Vector<const byte> literal_bytes) {
Key key = { is_ascii, literal_bytes };
HashMap::Entry* entry = symbol_table_.Lookup(&key, hash, true);
int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
if (id == 0) {
// Copy literal contents for later comparison.
key.literal_bytes =
Vector<const byte>::cast(literal_chars_.AddBlock(literal_bytes));
// Put (symbol_id_ + 1) into entry and increment it.
id = ++symbol_id_;
entry->value = reinterpret_cast<void*>(id);
Vector<Vector<const char> > symbol = symbol_entries_.AddBlock(1, literal);
Vector<Key> symbol = symbol_keys_.AddBlock(1, key);
entry->key = &symbol[0];
}
WriteNumber(id - 1);
......
......@@ -75,7 +75,8 @@ class ParserRecorder {
int properties) = 0;
// Logs a symbol creation of a literal or identifier.
virtual void LogSymbol(int start, const char* symbol, int length) = 0;
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) { }
// Logs an error message and marks the log as containing an error.
// Further logging will be ignored, and ExtractData will return a vector
......@@ -165,7 +166,8 @@ class FunctionLoggingParserRecorder : public ParserRecorder {
class PartialParserRecorder : public FunctionLoggingParserRecorder {
public:
PartialParserRecorder() : FunctionLoggingParserRecorder() { }
virtual void LogSymbol(int start, const char* symbol, int length) { }
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) { }
virtual ~PartialParserRecorder() { }
virtual Vector<unsigned> ExtractData();
virtual int symbol_position() { return 0; }
......@@ -181,7 +183,17 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
CompleteParserRecorder();
virtual ~CompleteParserRecorder() { }
virtual void LogSymbol(int start, const char* symbol, int length);
virtual void LogAsciiSymbol(int start, Vector<const char> literal) {
if (!is_recording_) return;
int hash = vector_hash(literal);
LogSymbol(start, hash, true, Vector<const byte>::cast(literal));
}
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) {
if (!is_recording_) return;
int hash = vector_hash(literal);
LogSymbol(start, hash, false, Vector<const byte>::cast(literal));
}
virtual Vector<unsigned> ExtractData();
......@@ -189,10 +201,21 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
virtual int symbol_ids() { return symbol_id_; }
private:
static int vector_hash(Vector<const char> string) {
struct Key {
bool is_ascii;
Vector<const byte> literal_bytes;
};
virtual void LogSymbol(int start,
int hash,
bool is_ascii,
Vector<const byte> literal);
template <typename Char>
static int vector_hash(Vector<const Char> string) {
int hash = 0;
for (int i = 0; i < string.length(); i++) {
int c = string[i];
int c = static_cast<int>(string[i]);
hash += c;
hash += (hash << 10);
hash ^= (hash >> 6);
......@@ -201,18 +224,21 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
}
static bool vector_compare(void* a, void* b) {
Vector<const char>* string1 = reinterpret_cast<Vector<const char>* >(a);
Vector<const char>* string2 = reinterpret_cast<Vector<const char>* >(b);
int length = string1->length();
if (string2->length() != length) return false;
return memcmp(string1->start(), string2->start(), length) == 0;
Key* string1 = reinterpret_cast<Key*>(a);
Key* string2 = reinterpret_cast<Key*>(b);
if (string1->is_ascii != string2->is_ascii) return false;
int length = string1->literal_bytes.length();
if (string2->literal_bytes.length() != length) return false;
return memcmp(string1->literal_bytes.start(),
string2->literal_bytes.start(), length) == 0;
}
// Write a non-negative number to the symbol store.
void WriteNumber(int number);
Collector<byte> literal_chars_;
Collector<byte> symbol_store_;
Collector<Vector<const char> > symbol_entries_;
Collector<Key> symbol_keys_;
HashMap symbol_table_;
int symbol_id_;
};
......
......@@ -1121,23 +1121,23 @@ void PreParser::ExpectSemicolon(bool* ok) {
PreParser::Identifier PreParser::GetIdentifierSymbol() {
const char* literal_chars = scanner_->literal_string();
int literal_length = scanner_->literal_length();
int identifier_pos = scanner_->location().beg_pos;
log_->LogSymbol(identifier_pos, literal_chars, literal_length);
return kUnknownExpression;
if (scanner_->is_literal_ascii()) {
log_->LogAsciiSymbol(identifier_pos, scanner_->literal_ascii_string());
} else {
log_->LogUC16Symbol(identifier_pos, scanner_->literal_uc16_string());
}
return kUnknownIdentifier;
}
PreParser::Expression PreParser::GetStringSymbol() {
const char* literal_chars = scanner_->literal_string();
int literal_length = scanner_->literal_length();
int literal_position = scanner_->location().beg_pos;
log_->LogSymbol(literal_position, literal_chars, literal_length);
int identifier_pos = scanner_->location().beg_pos;
if (scanner_->is_literal_ascii()) {
log_->LogAsciiSymbol(identifier_pos, scanner_->literal_ascii_string());
} else {
log_->LogUC16Symbol(identifier_pos, scanner_->literal_uc16_string());
}
return kUnknownExpression;
}
......@@ -1154,7 +1154,8 @@ PreParser::Identifier PreParser::ParseIdentifierName(bool* ok) {
if (i::Token::IsKeyword(next)) {
int pos = scanner_->location().beg_pos;
const char* keyword = i::Token::String(next);
log_->LogSymbol(pos, keyword, i::StrLength(keyword));
log_->LogAsciiSymbol(pos, i::Vector<const char>(keyword,
i::StrLength(keyword)));
return kUnknownExpression;
}
if (next == i::Token::IDENTIFIER) {
......@@ -1173,8 +1174,8 @@ PreParser::Identifier PreParser::ParseIdentifierOrGetOrSet(bool* is_get,
bool* is_set,
bool* ok) {
Expect(i::Token::IDENTIFIER, CHECK_OK);
if (scanner_->literal_length() == 3) {
const char* token = scanner_->literal_string();
if (scanner_->is_literal_ascii() && scanner_->literal_length() == 3) {
const char* token = scanner_->literal_ascii_string().start();
*is_get = strncmp(token, "get", 3) == 0;
*is_set = !*is_get && strncmp(token, "set", 3) == 0;
}
......
......@@ -34,28 +34,6 @@
namespace v8 {
namespace internal {
// ----------------------------------------------------------------------------
// LiteralCollector
LiteralCollector::LiteralCollector()
: buffer_(kInitialCapacity), recording_(false) { }
LiteralCollector::~LiteralCollector() {}
void LiteralCollector::AddCharSlow(uc32 c) {
ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
int length = unibrow::Utf8::Length(c);
Vector<char> block = buffer_.AddBlock(length, '\0');
#ifdef DEBUG
int written_length = unibrow::Utf8::Encode(block.start(), c);
CHECK_EQ(length, written_length);
#else
unibrow::Utf8::Encode(block.start(), c);
#endif
}
// ----------------------------------------------------------------------------
// Character predicates
......@@ -256,7 +234,7 @@ Token::Value JavaScriptScanner::ScanHtmlComment() {
void JavaScriptScanner::Scan() {
next_.literal_chars = Vector<const char>();
next_.literal_chars = NULL;
Token::Value token;
do {
// Remember the position of the next token
......@@ -561,7 +539,7 @@ Token::Value JavaScriptScanner::ScanString() {
uc32 quote = c0_;
Advance(); // consume quote
LiteralScope literal(this, kLiteralString);
LiteralScope literal(this);
while (c0_ != quote && c0_ >= 0
&& !ScannerConstants::kIsLineTerminator.get(c0_)) {
uc32 c = c0_;
......@@ -592,7 +570,7 @@ Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
LiteralScope literal(this, kLiteralNumber);
LiteralScope literal(this);
if (seen_period) {
// we have already seen a decimal point of the float
AddLiteralChar('.');
......@@ -681,7 +659,7 @@ uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
LiteralScope literal(this, kLiteralIdentifier);
LiteralScope literal(this);
KeywordMatcher keyword_match;
// Scan identifier start character.
if (c0_ == '\\') {
......@@ -747,7 +725,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp
// constructor.
LiteralScope literal(this, kLiteralRegExp);
LiteralScope literal(this);
if (seen_equal)
AddLiteralChar('=');
......@@ -773,7 +751,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
bool JavaScriptScanner::ScanRegExpFlags() {
// Scan regular expression flags.
LiteralScope literal(this, kLiteralRegExpFlags);
LiteralScope literal(this);
while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
......
This diff is collapsed.
......@@ -324,10 +324,8 @@ void Scanner::LiteralScope::Complete() {
V8JavaScriptScanner::V8JavaScriptScanner() : JavaScriptScanner() { }
void V8JavaScriptScanner::Initialize(UC16CharacterStream* source,
int literal_flags) {
void V8JavaScriptScanner::Initialize(UC16CharacterStream* source) {
source_ = source;
literal_flags_ = literal_flags | kLiteralIdentifier;
// Need to capture identifiers in order to recognize "get" and "set"
// in object literals.
Init();
......@@ -377,7 +375,7 @@ bool JsonScanner::SkipJsonWhiteSpace() {
void JsonScanner::ScanJson() {
next_.literal_chars = Vector<const char>();
next_.literal_chars = NULL;
Token::Value token;
do {
// Remember the position of the next token
......
......@@ -134,8 +134,7 @@ class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
class V8JavaScriptScanner : public JavaScriptScanner {
public:
V8JavaScriptScanner();
void Initialize(UC16CharacterStream* source,
int literal_flags = kAllLiterals);
void Initialize(UC16CharacterStream* source);
};
......
......@@ -530,6 +530,24 @@ class Collector {
}
// Add a contiguous block of elements and return a vector backed
// by the added block.
// A basic Collector will keep this vector valid as long as the Collector
// is alive.
inline Vector<T> AddBlock(Vector<const T> source) {
if (source.length() > current_chunk_.length() - index_) {
Grow(source.length());
}
T* position = current_chunk_.start() + index_;
index_ += source.length();
size_ += source.length();
for (int i = 0; i < source.length(); i++) {
position[i] = source[i];
}
return Vector<T>(position, source.length());
}
// Write the contents of the collector into the provided vector.
void WriteTo(Vector<T> destination) {
ASSERT(size_ <= destination.length());
......
......@@ -573,7 +573,7 @@ void TestStreamScanner(i::UC16CharacterStream* stream,
int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) {
i::V8JavaScriptScanner scanner;
scanner.Initialize(stream, i::JavaScriptScanner::kAllLiterals);
scanner.Initialize(stream);
int i = 0;
do {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment