Commit c9928c05 authored by lrn@chromium.org's avatar lrn@chromium.org

Change scanner buffers to not use utf-8.

Make preparser keep its symbol text itself instead of relying on the scanner.

Review URL: http://codereview.chromium.org/6075005

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6115 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 4e32d5ad
...@@ -99,6 +99,14 @@ Handle<String> Factory::LookupSymbol(Vector<const char> string) { ...@@ -99,6 +99,14 @@ Handle<String> Factory::LookupSymbol(Vector<const char> string) {
CALL_HEAP_FUNCTION(Heap::LookupSymbol(string), String); CALL_HEAP_FUNCTION(Heap::LookupSymbol(string), String);
} }
Handle<String> Factory::LookupAsciiSymbol(Vector<const char> string) {
CALL_HEAP_FUNCTION(Heap::LookupAsciiSymbol(string), String);
}
Handle<String> Factory::LookupTwoByteSymbol(Vector<const uc16> string) {
CALL_HEAP_FUNCTION(Heap::LookupTwoByteSymbol(string), String);
}
Handle<String> Factory::NewStringFromAscii(Vector<const char> string, Handle<String> Factory::NewStringFromAscii(Vector<const char> string,
PretenureFlag pretenure) { PretenureFlag pretenure) {
......
...@@ -61,6 +61,8 @@ class Factory : public AllStatic { ...@@ -61,6 +61,8 @@ class Factory : public AllStatic {
PretenureFlag pretenure); PretenureFlag pretenure);
static Handle<String> LookupSymbol(Vector<const char> str); static Handle<String> LookupSymbol(Vector<const char> str);
static Handle<String> LookupAsciiSymbol(Vector<const char> str);
static Handle<String> LookupTwoByteSymbol(Vector<const uc16> str);
static Handle<String> LookupAsciiSymbol(const char* str) { static Handle<String> LookupAsciiSymbol(const char* str) {
return LookupSymbol(CStrVector(str)); return LookupSymbol(CStrVector(str));
} }
......
...@@ -181,10 +181,6 @@ typedef byte* Address; ...@@ -181,10 +181,6 @@ typedef byte* Address;
#define USING_BSD_ABI #define USING_BSD_ABI
#endif #endif
// Code-point values in Unicode 4.0 are 21 bits wide.
typedef uint16_t uc16;
typedef int32_t uc32;
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Constants // Constants
...@@ -228,6 +224,15 @@ const int kBinary32MinExponent = 0x01; ...@@ -228,6 +224,15 @@ const int kBinary32MinExponent = 0x01;
const int kBinary32MantissaBits = 23; const int kBinary32MantissaBits = 23;
const int kBinary32ExponentShift = 23; const int kBinary32ExponentShift = 23;
// ASCII/UC16 constants
// Code-point values in Unicode 4.0 are 21 bits wide.
typedef uint16_t uc16;
typedef int32_t uc32;
const int kASCIISize = kCharSize;
const int kUC16Size = sizeof(uc16); // NOLINT
const uc32 kMaxAsciiCharCode = 0x7f;
const uint32_t kMaxAsciiCharCodeU = 0x7fu;
// The expression OFFSET_OF(type, field) computes the byte-offset // The expression OFFSET_OF(type, field) computes the byte-offset
// of the specified field relative to the containing type. This // of the specified field relative to the containing type. This
......
...@@ -62,6 +62,71 @@ MaybeObject* Heap::AllocateSymbol(Vector<const char> str, ...@@ -62,6 +62,71 @@ MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
} }
MaybeObject* Heap::AllocateAsciiSymbol(Vector<const char> str,
uint32_t hash_field) {
if (str.length() > SeqAsciiString::kMaxLength) {
return Failure::OutOfMemoryException();
}
// Compute map and object size.
Map* map = ascii_symbol_map();
int size = SeqAsciiString::SizeFor(str.length());
// Allocate string.
Object* result;
{ MaybeObject* maybe_result = (size > MaxObjectSizeInPagedSpace())
? lo_space_->AllocateRaw(size)
: old_data_space_->AllocateRaw(size);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
reinterpret_cast<HeapObject*>(result)->set_map(map);
// Set length and hash fields of the allocated string.
String* answer = String::cast(result);
answer->set_length(str.length());
answer->set_hash_field(hash_field);
ASSERT_EQ(size, answer->Size());
// Fill in the characters.
memcpy(answer->address() + SeqAsciiString::kHeaderSize,
str.start(), str.length());
return answer;
}
MaybeObject* Heap::AllocateTwoByteSymbol(Vector<const uc16> str,
uint32_t hash_field) {
if (str.length() > SeqTwoByteString::kMaxLength) {
return Failure::OutOfMemoryException();
}
// Compute map and object size.
Map* map = symbol_map();
int size = SeqTwoByteString::SizeFor(str.length());
// Allocate string.
Object* result;
{ MaybeObject* maybe_result = (size > MaxObjectSizeInPagedSpace())
? lo_space_->AllocateRaw(size)
: old_data_space_->AllocateRaw(size);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
reinterpret_cast<HeapObject*>(result)->set_map(map);
// Set length and hash fields of the allocated string.
String* answer = String::cast(result);
answer->set_length(str.length());
answer->set_hash_field(hash_field);
ASSERT_EQ(size, answer->Size());
// Fill in the characters.
memcpy(answer->address() + SeqTwoByteString::kHeaderSize,
str.start(), str.length() * kUC16Size);
return answer;
}
MaybeObject* Heap::CopyFixedArray(FixedArray* src) { MaybeObject* Heap::CopyFixedArray(FixedArray* src) {
return CopyFixedArrayWithMap(src, src->map()); return CopyFixedArrayWithMap(src, src->map());
} }
......
...@@ -4013,6 +4013,36 @@ MaybeObject* Heap::LookupSymbol(Vector<const char> string) { ...@@ -4013,6 +4013,36 @@ MaybeObject* Heap::LookupSymbol(Vector<const char> string) {
} }
MaybeObject* Heap::LookupAsciiSymbol(Vector<const char> string) {
Object* symbol = NULL;
Object* new_table;
{ MaybeObject* maybe_new_table =
symbol_table()->LookupAsciiSymbol(string, &symbol);
if (!maybe_new_table->ToObject(&new_table)) return maybe_new_table;
}
// Can't use set_symbol_table because SymbolTable::cast knows that
// SymbolTable is a singleton and checks for identity.
roots_[kSymbolTableRootIndex] = new_table;
ASSERT(symbol != NULL);
return symbol;
}
MaybeObject* Heap::LookupTwoByteSymbol(Vector<const uc16> string) {
Object* symbol = NULL;
Object* new_table;
{ MaybeObject* maybe_new_table =
symbol_table()->LookupTwoByteSymbol(string, &symbol);
if (!maybe_new_table->ToObject(&new_table)) return maybe_new_table;
}
// Can't use set_symbol_table because SymbolTable::cast knows that
// SymbolTable is a singleton and checks for identity.
roots_[kSymbolTableRootIndex] = new_table;
ASSERT(symbol != NULL);
return symbol;
}
MaybeObject* Heap::LookupSymbol(String* string) { MaybeObject* Heap::LookupSymbol(String* string) {
if (string->IsSymbol()) return string; if (string->IsSymbol()) return string;
Object* symbol = NULL; Object* symbol = NULL;
......
...@@ -431,6 +431,14 @@ class Heap : public AllStatic { ...@@ -431,6 +431,14 @@ class Heap : public AllStatic {
int chars, int chars,
uint32_t hash_field); uint32_t hash_field);
MUST_USE_RESULT static inline MaybeObject* AllocateAsciiSymbol(
Vector<const char> str,
uint32_t hash_field);
MUST_USE_RESULT static inline MaybeObject* AllocateTwoByteSymbol(
Vector<const uc16> str,
uint32_t hash_field);
MUST_USE_RESULT static MaybeObject* AllocateInternalSymbol( MUST_USE_RESULT static MaybeObject* AllocateInternalSymbol(
unibrow::CharacterStream* buffer, int chars, uint32_t hash_field); unibrow::CharacterStream* buffer, int chars, uint32_t hash_field);
...@@ -686,6 +694,9 @@ class Heap : public AllStatic { ...@@ -686,6 +694,9 @@ class Heap : public AllStatic {
// failed. // failed.
// Please note this function does not perform a garbage collection. // Please note this function does not perform a garbage collection.
MUST_USE_RESULT static MaybeObject* LookupSymbol(Vector<const char> str); MUST_USE_RESULT static MaybeObject* LookupSymbol(Vector<const char> str);
MUST_USE_RESULT static MaybeObject* LookupAsciiSymbol(Vector<const char> str);
MUST_USE_RESULT static MaybeObject* LookupTwoByteSymbol(
Vector<const uc16> str);
MUST_USE_RESULT static MaybeObject* LookupAsciiSymbol(const char* str) { MUST_USE_RESULT static MaybeObject* LookupAsciiSymbol(const char* str) {
return LookupSymbol(CStrVector(str)); return LookupSymbol(CStrVector(str));
} }
......
...@@ -5143,6 +5143,26 @@ bool String::IsEqualTo(Vector<const char> str) { ...@@ -5143,6 +5143,26 @@ bool String::IsEqualTo(Vector<const char> str) {
} }
bool String::IsAsciiEqualTo(Vector<const char> str) {
int slen = length();
if (str.length() != slen) return false;
for (int i = 0; i < slen; i++) {
if (Get(i) != static_cast<uint16_t>(str[i])) return false;
}
return true;
}
bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
int slen = length();
if (str.length() != slen) return false;
for (int i = 0; i < slen; i++) {
if (Get(i) != str[i]) return false;
}
return true;
}
template <typename schar> template <typename schar>
static inline uint32_t HashSequentialString(const schar* chars, int length) { static inline uint32_t HashSequentialString(const schar* chars, int length) {
StringHasher hasher(length); StringHasher hasher(length);
...@@ -8086,6 +8106,85 @@ class Utf8SymbolKey : public HashTableKey { ...@@ -8086,6 +8106,85 @@ class Utf8SymbolKey : public HashTableKey {
}; };
template <typename Char>
class SequentialSymbolKey : public HashTableKey {
public:
explicit SequentialSymbolKey(Vector<const Char> string)
: string_(string), hash_field_(0) { }
uint32_t Hash() {
StringHasher hasher(string_.length());
// Very long strings have a trivial hash that doesn't inspect the
// string contents.
if (hasher.has_trivial_hash()) {
hash_field_ = hasher.GetHashField();
} else {
int i = 0;
// Do the iterative array index computation as long as there is a
// chance this is an array index.
while (i < string_.length() && hasher.is_array_index()) {
hasher.AddCharacter(static_cast<uc32>(string_[i]));
i++;
}
// Process the remaining characters without updating the array
// index.
while (i < string_.length()) {
hasher.AddCharacterNoIndex(static_cast<uc32>(string_[i]));
i++;
}
hash_field_ = hasher.GetHashField();
}
uint32_t result = hash_field_ >> String::kHashShift;
ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
return result;
}
uint32_t HashForObject(Object* other) {
return String::cast(other)->Hash();
}
Vector<const Char> string_;
uint32_t hash_field_;
};
class AsciiSymbolKey : public SequentialSymbolKey<char> {
public:
explicit AsciiSymbolKey(Vector<const char> str)
: SequentialSymbolKey<char>(str) { }
bool IsMatch(Object* string) {
return String::cast(string)->IsAsciiEqualTo(string_);
}
MaybeObject* AsObject() {
if (hash_field_ == 0) Hash();
return Heap::AllocateAsciiSymbol(string_, hash_field_);
}
};
class TwoByteSymbolKey : public SequentialSymbolKey<uc16> {
public:
explicit TwoByteSymbolKey(Vector<const uc16> str)
: SequentialSymbolKey<uc16>(str) { }
bool IsMatch(Object* string) {
return String::cast(string)->IsTwoByteEqualTo(string_);
}
MaybeObject* AsObject() {
if (hash_field_ == 0) Hash();
return Heap::AllocateTwoByteSymbol(string_, hash_field_);
}
};
// SymbolKey carries a string/symbol object as key. // SymbolKey carries a string/symbol object as key.
class SymbolKey : public HashTableKey { class SymbolKey : public HashTableKey {
public: public:
...@@ -8830,6 +8929,19 @@ MaybeObject* SymbolTable::LookupSymbol(Vector<const char> str, Object** s) { ...@@ -8830,6 +8929,19 @@ MaybeObject* SymbolTable::LookupSymbol(Vector<const char> str, Object** s) {
} }
MaybeObject* SymbolTable::LookupAsciiSymbol(Vector<const char> str,
Object** s) {
AsciiSymbolKey key(str);
return LookupKey(&key, s);
}
MaybeObject* SymbolTable::LookupTwoByteSymbol(Vector<const uc16> str,
Object** s) {
TwoByteSymbolKey key(str);
return LookupKey(&key, s);
}
MaybeObject* SymbolTable::LookupKey(HashTableKey* key, Object** s) { MaybeObject* SymbolTable::LookupKey(HashTableKey* key, Object** s) {
int entry = FindEntry(key); int entry = FindEntry(key);
......
...@@ -2327,6 +2327,10 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> { ...@@ -2327,6 +2327,10 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> {
// been enlarged. If the return value is not a failure, the symbol // been enlarged. If the return value is not a failure, the symbol
// pointer *s is set to the symbol found. // pointer *s is set to the symbol found.
MUST_USE_RESULT MaybeObject* LookupSymbol(Vector<const char> str, Object** s); MUST_USE_RESULT MaybeObject* LookupSymbol(Vector<const char> str, Object** s);
MUST_USE_RESULT MaybeObject* LookupAsciiSymbol(Vector<const char> str,
Object** s);
MUST_USE_RESULT MaybeObject* LookupTwoByteSymbol(Vector<const uc16> str,
Object** s);
MUST_USE_RESULT MaybeObject* LookupString(String* key, Object** s); MUST_USE_RESULT MaybeObject* LookupString(String* key, Object** s);
// Looks up a symbol that is equal to the given string and returns // Looks up a symbol that is equal to the given string and returns
...@@ -5074,6 +5078,8 @@ class String: public HeapObject { ...@@ -5074,6 +5078,8 @@ class String: public HeapObject {
// String equality operations. // String equality operations.
inline bool Equals(String* other); inline bool Equals(String* other);
bool IsEqualTo(Vector<const char> str); bool IsEqualTo(Vector<const char> str);
bool IsAsciiEqualTo(Vector<const char> str);
bool IsTwoByteEqualTo(Vector<const uc16> str);
// Return a UTF8 representation of the string. The string is null // Return a UTF8 representation of the string. The string is null
// terminated but may optionally contain nulls. Length is returned // terminated but may optionally contain nulls. Length is returned
......
...@@ -323,22 +323,24 @@ TemporaryScope::~TemporaryScope() { ...@@ -323,22 +323,24 @@ TemporaryScope::~TemporaryScope() {
} }
Handle<String> Parser::LookupSymbol(int symbol_id, Handle<String> Parser::LookupSymbol(int symbol_id) {
Vector<const char> string) {
// Length of symbol cache is the number of identified symbols. // Length of symbol cache is the number of identified symbols.
// If we are larger than that, or negative, it's not a cached symbol. // If we are larger than that, or negative, it's not a cached symbol.
// This might also happen if there is no preparser symbol data, even // This might also happen if there is no preparser symbol data, even
// if there is some preparser data. // if there is some preparser data.
if (static_cast<unsigned>(symbol_id) if (static_cast<unsigned>(symbol_id)
>= static_cast<unsigned>(symbol_cache_.length())) { >= static_cast<unsigned>(symbol_cache_.length())) {
return Factory::LookupSymbol(string); if (scanner().is_literal_ascii()) {
return Factory::LookupAsciiSymbol(scanner().literal_ascii_string());
} else {
return Factory::LookupTwoByteSymbol(scanner().literal_uc16_string());
}
} }
return LookupCachedSymbol(symbol_id, string); return LookupCachedSymbol(symbol_id);
} }
Handle<String> Parser::LookupCachedSymbol(int symbol_id, Handle<String> Parser::LookupCachedSymbol(int symbol_id) {
Vector<const char> string) {
// Make sure the cache is large enough to hold the symbol identifier. // Make sure the cache is large enough to hold the symbol identifier.
if (symbol_cache_.length() <= symbol_id) { if (symbol_cache_.length() <= symbol_id) {
// Increase length to index + 1. // Increase length to index + 1.
...@@ -347,7 +349,11 @@ Handle<String> Parser::LookupCachedSymbol(int symbol_id, ...@@ -347,7 +349,11 @@ Handle<String> Parser::LookupCachedSymbol(int symbol_id,
} }
Handle<String> result = symbol_cache_.at(symbol_id); Handle<String> result = symbol_cache_.at(symbol_id);
if (result.is_null()) { if (result.is_null()) {
result = Factory::LookupSymbol(string); if (scanner().is_literal_ascii()) {
result = Factory::LookupAsciiSymbol(scanner().literal_ascii_string());
} else {
result = Factory::LookupTwoByteSymbol(scanner().literal_uc16_string());
}
symbol_cache_.at(symbol_id) = result; symbol_cache_.at(symbol_id) = result;
return result; return result;
} }
...@@ -615,11 +621,11 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source, ...@@ -615,11 +621,11 @@ FunctionLiteral* Parser::ParseProgram(Handle<String> source,
// identical calls. // identical calls.
ExternalTwoByteStringUC16CharacterStream stream( ExternalTwoByteStringUC16CharacterStream stream(
Handle<ExternalTwoByteString>::cast(source), 0, source->length()); Handle<ExternalTwoByteString>::cast(source), 0, source->length());
scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals); scanner_.Initialize(&stream);
return DoParseProgram(source, in_global_context, &zone_scope); return DoParseProgram(source, in_global_context, &zone_scope);
} else { } else {
GenericStringUC16CharacterStream stream(source, 0, source->length()); GenericStringUC16CharacterStream stream(source, 0, source->length());
scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals); scanner_.Initialize(&stream);
return DoParseProgram(source, in_global_context, &zone_scope); return DoParseProgram(source, in_global_context, &zone_scope);
} }
} }
...@@ -705,7 +711,7 @@ FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info) { ...@@ -705,7 +711,7 @@ FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info) {
FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info, FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info,
UC16CharacterStream* source, UC16CharacterStream* source,
ZoneScope* zone_scope) { ZoneScope* zone_scope) {
scanner_.Initialize(source, JavaScriptScanner::kAllLiterals); scanner_.Initialize(source);
ASSERT(target_stack_ == NULL); ASSERT(target_stack_ == NULL);
Handle<String> name(String::cast(info->name())); Handle<String> name(String::cast(info->name()));
...@@ -757,7 +763,7 @@ Handle<String> Parser::GetSymbol(bool* ok) { ...@@ -757,7 +763,7 @@ Handle<String> Parser::GetSymbol(bool* ok) {
if (pre_data() != NULL) { if (pre_data() != NULL) {
symbol_id = pre_data()->GetSymbolIdentifier(); symbol_id = pre_data()->GetSymbolIdentifier();
} }
return LookupSymbol(symbol_id, scanner().literal()); return LookupSymbol(symbol_id);
} }
...@@ -2715,8 +2721,9 @@ Expression* Parser::ParsePrimaryExpression(bool* ok) { ...@@ -2715,8 +2721,9 @@ Expression* Parser::ParsePrimaryExpression(bool* ok) {
case Token::NUMBER: { case Token::NUMBER: {
Consume(Token::NUMBER); Consume(Token::NUMBER);
double value = ASSERT(scanner().is_literal_ascii());
StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS); double value = StringToDouble(scanner().literal_ascii_string(),
ALLOW_HEX | ALLOW_OCTALS);
result = NewNumberLiteral(value); result = NewNumberLiteral(value);
break; break;
} }
...@@ -3066,8 +3073,9 @@ Expression* Parser::ParseObjectLiteral(bool* ok) { ...@@ -3066,8 +3073,9 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
} }
case Token::NUMBER: { case Token::NUMBER: {
Consume(Token::NUMBER); Consume(Token::NUMBER);
double value = ASSERT(scanner().is_literal_ascii());
StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS); double value = StringToDouble(scanner().literal_ascii_string(),
ALLOW_HEX | ALLOW_OCTALS);
key = NewNumberLiteral(value); key = NewNumberLiteral(value);
break; break;
} }
...@@ -3137,11 +3145,9 @@ Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) { ...@@ -3137,11 +3145,9 @@ Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) {
int literal_index = temp_scope_->NextMaterializedLiteralIndex(); int literal_index = temp_scope_->NextMaterializedLiteralIndex();
Handle<String> js_pattern = Handle<String> js_pattern = NextLiteralString(TENURED);
Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
scanner().ScanRegExpFlags(); scanner().ScanRegExpFlags();
Handle<String> js_flags = Handle<String> js_flags = NextLiteralString(TENURED);
Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
Next(); Next();
return new RegExpLiteral(js_pattern, js_flags, literal_index); return new RegExpLiteral(js_pattern, js_flags, literal_index);
...@@ -3423,10 +3429,10 @@ Handle<String> Parser::ParseIdentifierOrGetOrSet(bool* is_get, ...@@ -3423,10 +3429,10 @@ Handle<String> Parser::ParseIdentifierOrGetOrSet(bool* is_get,
bool* ok) { bool* ok) {
Expect(Token::IDENTIFIER, ok); Expect(Token::IDENTIFIER, ok);
if (!*ok) return Handle<String>(); if (!*ok) return Handle<String>();
if (scanner().literal_length() == 3) { if (scanner().is_literal_ascii() && scanner().literal_length() == 3) {
const char* token = scanner().literal_string(); const char* token = scanner().literal_ascii_string().start();
*is_get = strcmp(token, "get") == 0; *is_get = strncmp(token, "get", 3) == 0;
*is_set = !*is_get && strcmp(token, "set") == 0; *is_set = !*is_get && strncmp(token, "set", 3) == 0;
} }
return GetSymbol(ok); return GetSymbol(ok);
} }
...@@ -3604,9 +3610,11 @@ Handle<String> JsonParser::GetString() { ...@@ -3604,9 +3610,11 @@ Handle<String> JsonParser::GetString() {
if (literal_length == 0) { if (literal_length == 0) {
return Factory::empty_string(); return Factory::empty_string();
} }
const char* literal_string = scanner_.literal_string(); if (scanner_.is_literal_ascii()) {
Vector<const char> literal(literal_string, literal_length); return Factory::NewStringFromAscii(scanner_.literal_ascii_string());
return Factory::NewStringFromUtf8(literal); } else {
return Factory::NewStringFromTwoByte(scanner_.literal_uc16_string());
}
} }
...@@ -3618,7 +3626,8 @@ Handle<Object> JsonParser::ParseJsonValue() { ...@@ -3618,7 +3626,8 @@ Handle<Object> JsonParser::ParseJsonValue() {
return GetString(); return GetString();
} }
case Token::NUMBER: { case Token::NUMBER: {
double value = StringToDouble(scanner_.literal(), ASSERT(scanner_.is_literal_ascii());
double value = StringToDouble(scanner_.literal_ascii_string(),
NO_FLAGS, // Hex, octal or trailing junk. NO_FLAGS, // Hex, octal or trailing junk.
OS::nan_value()); OS::nan_value());
return Factory::NewNumber(value); return Factory::NewNumber(value);
...@@ -4597,10 +4606,9 @@ int ScriptDataImpl::ReadNumber(byte** source) { ...@@ -4597,10 +4606,9 @@ int ScriptDataImpl::ReadNumber(byte** source) {
// Create a Scanner for the preparser to use as input, and preparse the source. // Create a Scanner for the preparser to use as input, and preparse the source.
static ScriptDataImpl* DoPreParse(UC16CharacterStream* source, static ScriptDataImpl* DoPreParse(UC16CharacterStream* source,
bool allow_lazy, bool allow_lazy,
ParserRecorder* recorder, ParserRecorder* recorder) {
int literal_flags) {
V8JavaScriptScanner scanner; V8JavaScriptScanner scanner;
scanner.Initialize(source, literal_flags); scanner.Initialize(source);
intptr_t stack_limit = StackGuard::real_climit(); intptr_t stack_limit = StackGuard::real_climit();
if (!preparser::PreParser::PreParseProgram(&scanner, if (!preparser::PreParser::PreParseProgram(&scanner,
recorder, recorder,
...@@ -4628,8 +4636,7 @@ ScriptDataImpl* ParserApi::PartialPreParse(UC16CharacterStream* source, ...@@ -4628,8 +4636,7 @@ ScriptDataImpl* ParserApi::PartialPreParse(UC16CharacterStream* source,
return NULL; return NULL;
} }
PartialParserRecorder recorder; PartialParserRecorder recorder;
return DoPreParse(source, allow_lazy, &recorder, return DoPreParse(source, allow_lazy, &recorder);
JavaScriptScanner::kNoLiterals);
} }
...@@ -4638,9 +4645,7 @@ ScriptDataImpl* ParserApi::PreParse(UC16CharacterStream* source, ...@@ -4638,9 +4645,7 @@ ScriptDataImpl* ParserApi::PreParse(UC16CharacterStream* source,
Handle<Script> no_script; Handle<Script> no_script;
bool allow_lazy = FLAG_lazy && (extension == NULL); bool allow_lazy = FLAG_lazy && (extension == NULL);
CompleteParserRecorder recorder; CompleteParserRecorder recorder;
int kPreParseLiteralsFlags = return DoPreParse(source, allow_lazy, &recorder);
JavaScriptScanner::kLiteralString | JavaScriptScanner::kLiteralIdentifier;
return DoPreParse(source, allow_lazy, &recorder, kPreParseLiteralsFlags);
} }
......
...@@ -578,6 +578,26 @@ class Parser { ...@@ -578,6 +578,26 @@ class Parser {
bool Check(Token::Value token); bool Check(Token::Value token);
void ExpectSemicolon(bool* ok); void ExpectSemicolon(bool* ok);
Handle<String> LiteralString(PretenureFlag tenured) {
if (scanner().is_literal_ascii()) {
return Factory::NewStringFromAscii(scanner().literal_ascii_string(),
tenured);
} else {
return Factory::NewStringFromTwoByte(scanner().literal_uc16_string(),
tenured);
}
}
Handle<String> NextLiteralString(PretenureFlag tenured) {
if (scanner().is_next_literal_ascii()) {
return Factory::NewStringFromAscii(scanner().next_literal_ascii_string(),
tenured);
} else {
return Factory::NewStringFromTwoByte(scanner().next_literal_uc16_string(),
tenured);
}
}
Handle<String> GetSymbol(bool* ok); Handle<String> GetSymbol(bool* ok);
// Get odd-ball literals. // Get odd-ball literals.
...@@ -612,11 +632,9 @@ class Parser { ...@@ -612,11 +632,9 @@ class Parser {
Scope* NewScope(Scope* parent, Scope::Type type, bool inside_with); Scope* NewScope(Scope* parent, Scope::Type type, bool inside_with);
Handle<String> LookupSymbol(int symbol_id, Handle<String> LookupSymbol(int symbol_id);
Vector<const char> string);
Handle<String> LookupCachedSymbol(int symbol_id, Handle<String> LookupCachedSymbol(int symbol_id);
Vector<const char> string);
Expression* NewCall(Expression* expression, Expression* NewCall(Expression* expression,
ZoneList<Expression*>* arguments, ZoneList<Expression*>* arguments,
......
...@@ -110,26 +110,29 @@ Vector<unsigned> PartialParserRecorder::ExtractData() { ...@@ -110,26 +110,29 @@ Vector<unsigned> PartialParserRecorder::ExtractData() {
CompleteParserRecorder::CompleteParserRecorder() CompleteParserRecorder::CompleteParserRecorder()
: FunctionLoggingParserRecorder(), : FunctionLoggingParserRecorder(),
literal_chars_(0),
symbol_store_(0), symbol_store_(0),
symbol_entries_(0), symbol_keys_(0),
symbol_table_(vector_compare), symbol_table_(vector_compare),
symbol_id_(0) { symbol_id_(0) {
} }
void CompleteParserRecorder::LogSymbol( void CompleteParserRecorder::LogSymbol(int start,
int start, const char* literal_chars, int length) { int hash,
if (!is_recording_) return; bool is_ascii,
Vector<const byte> literal_bytes) {
Vector<const char> literal(literal_chars, length); Key key = { is_ascii, literal_bytes };
int hash = vector_hash(literal); HashMap::Entry* entry = symbol_table_.Lookup(&key, hash, true);
HashMap::Entry* entry = symbol_table_.Lookup(&literal, hash, true);
int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
if (id == 0) { if (id == 0) {
// Copy literal contents for later comparison.
key.literal_bytes =
Vector<const byte>::cast(literal_chars_.AddBlock(literal_bytes));
// Put (symbol_id_ + 1) into entry and increment it. // Put (symbol_id_ + 1) into entry and increment it.
id = ++symbol_id_; id = ++symbol_id_;
entry->value = reinterpret_cast<void*>(id); entry->value = reinterpret_cast<void*>(id);
Vector<Vector<const char> > symbol = symbol_entries_.AddBlock(1, literal); Vector<Key> symbol = symbol_keys_.AddBlock(1, key);
entry->key = &symbol[0]; entry->key = &symbol[0];
} }
WriteNumber(id - 1); WriteNumber(id - 1);
......
...@@ -75,7 +75,8 @@ class ParserRecorder { ...@@ -75,7 +75,8 @@ class ParserRecorder {
int properties) = 0; int properties) = 0;
// Logs a symbol creation of a literal or identifier. // Logs a symbol creation of a literal or identifier.
virtual void LogSymbol(int start, const char* symbol, int length) = 0; virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) { }
// Logs an error message and marks the log as containing an error. // Logs an error message and marks the log as containing an error.
// Further logging will be ignored, and ExtractData will return a vector // Further logging will be ignored, and ExtractData will return a vector
...@@ -165,7 +166,8 @@ class FunctionLoggingParserRecorder : public ParserRecorder { ...@@ -165,7 +166,8 @@ class FunctionLoggingParserRecorder : public ParserRecorder {
class PartialParserRecorder : public FunctionLoggingParserRecorder { class PartialParserRecorder : public FunctionLoggingParserRecorder {
public: public:
PartialParserRecorder() : FunctionLoggingParserRecorder() { } PartialParserRecorder() : FunctionLoggingParserRecorder() { }
virtual void LogSymbol(int start, const char* symbol, int length) { } virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) { }
virtual ~PartialParserRecorder() { } virtual ~PartialParserRecorder() { }
virtual Vector<unsigned> ExtractData(); virtual Vector<unsigned> ExtractData();
virtual int symbol_position() { return 0; } virtual int symbol_position() { return 0; }
...@@ -181,7 +183,17 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder { ...@@ -181,7 +183,17 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
CompleteParserRecorder(); CompleteParserRecorder();
virtual ~CompleteParserRecorder() { } virtual ~CompleteParserRecorder() { }
virtual void LogSymbol(int start, const char* symbol, int length); virtual void LogAsciiSymbol(int start, Vector<const char> literal) {
if (!is_recording_) return;
int hash = vector_hash(literal);
LogSymbol(start, hash, true, Vector<const byte>::cast(literal));
}
virtual void LogUC16Symbol(int start, Vector<const uc16> literal) {
if (!is_recording_) return;
int hash = vector_hash(literal);
LogSymbol(start, hash, false, Vector<const byte>::cast(literal));
}
virtual Vector<unsigned> ExtractData(); virtual Vector<unsigned> ExtractData();
...@@ -189,10 +201,21 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder { ...@@ -189,10 +201,21 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
virtual int symbol_ids() { return symbol_id_; } virtual int symbol_ids() { return symbol_id_; }
private: private:
static int vector_hash(Vector<const char> string) { struct Key {
bool is_ascii;
Vector<const byte> literal_bytes;
};
virtual void LogSymbol(int start,
int hash,
bool is_ascii,
Vector<const byte> literal);
template <typename Char>
static int vector_hash(Vector<const Char> string) {
int hash = 0; int hash = 0;
for (int i = 0; i < string.length(); i++) { for (int i = 0; i < string.length(); i++) {
int c = string[i]; int c = static_cast<int>(string[i]);
hash += c; hash += c;
hash += (hash << 10); hash += (hash << 10);
hash ^= (hash >> 6); hash ^= (hash >> 6);
...@@ -201,18 +224,21 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder { ...@@ -201,18 +224,21 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
} }
static bool vector_compare(void* a, void* b) { static bool vector_compare(void* a, void* b) {
Vector<const char>* string1 = reinterpret_cast<Vector<const char>* >(a); Key* string1 = reinterpret_cast<Key*>(a);
Vector<const char>* string2 = reinterpret_cast<Vector<const char>* >(b); Key* string2 = reinterpret_cast<Key*>(b);
int length = string1->length(); if (string1->is_ascii != string2->is_ascii) return false;
if (string2->length() != length) return false; int length = string1->literal_bytes.length();
return memcmp(string1->start(), string2->start(), length) == 0; if (string2->literal_bytes.length() != length) return false;
return memcmp(string1->literal_bytes.start(),
string2->literal_bytes.start(), length) == 0;
} }
// Write a non-negative number to the symbol store. // Write a non-negative number to the symbol store.
void WriteNumber(int number); void WriteNumber(int number);
Collector<byte> literal_chars_;
Collector<byte> symbol_store_; Collector<byte> symbol_store_;
Collector<Vector<const char> > symbol_entries_; Collector<Key> symbol_keys_;
HashMap symbol_table_; HashMap symbol_table_;
int symbol_id_; int symbol_id_;
}; };
......
...@@ -1121,23 +1121,23 @@ void PreParser::ExpectSemicolon(bool* ok) { ...@@ -1121,23 +1121,23 @@ void PreParser::ExpectSemicolon(bool* ok) {
PreParser::Identifier PreParser::GetIdentifierSymbol() { PreParser::Identifier PreParser::GetIdentifierSymbol() {
const char* literal_chars = scanner_->literal_string();
int literal_length = scanner_->literal_length();
int identifier_pos = scanner_->location().beg_pos; int identifier_pos = scanner_->location().beg_pos;
if (scanner_->is_literal_ascii()) {
log_->LogSymbol(identifier_pos, literal_chars, literal_length); log_->LogAsciiSymbol(identifier_pos, scanner_->literal_ascii_string());
} else {
return kUnknownExpression; log_->LogUC16Symbol(identifier_pos, scanner_->literal_uc16_string());
}
return kUnknownIdentifier;
} }
PreParser::Expression PreParser::GetStringSymbol() { PreParser::Expression PreParser::GetStringSymbol() {
const char* literal_chars = scanner_->literal_string(); int identifier_pos = scanner_->location().beg_pos;
int literal_length = scanner_->literal_length(); if (scanner_->is_literal_ascii()) {
log_->LogAsciiSymbol(identifier_pos, scanner_->literal_ascii_string());
int literal_position = scanner_->location().beg_pos; } else {
log_->LogSymbol(literal_position, literal_chars, literal_length); log_->LogUC16Symbol(identifier_pos, scanner_->literal_uc16_string());
}
return kUnknownExpression; return kUnknownExpression;
} }
...@@ -1154,7 +1154,8 @@ PreParser::Identifier PreParser::ParseIdentifierName(bool* ok) { ...@@ -1154,7 +1154,8 @@ PreParser::Identifier PreParser::ParseIdentifierName(bool* ok) {
if (i::Token::IsKeyword(next)) { if (i::Token::IsKeyword(next)) {
int pos = scanner_->location().beg_pos; int pos = scanner_->location().beg_pos;
const char* keyword = i::Token::String(next); const char* keyword = i::Token::String(next);
log_->LogSymbol(pos, keyword, i::StrLength(keyword)); log_->LogAsciiSymbol(pos, i::Vector<const char>(keyword,
i::StrLength(keyword)));
return kUnknownExpression; return kUnknownExpression;
} }
if (next == i::Token::IDENTIFIER) { if (next == i::Token::IDENTIFIER) {
...@@ -1173,8 +1174,8 @@ PreParser::Identifier PreParser::ParseIdentifierOrGetOrSet(bool* is_get, ...@@ -1173,8 +1174,8 @@ PreParser::Identifier PreParser::ParseIdentifierOrGetOrSet(bool* is_get,
bool* is_set, bool* is_set,
bool* ok) { bool* ok) {
Expect(i::Token::IDENTIFIER, CHECK_OK); Expect(i::Token::IDENTIFIER, CHECK_OK);
if (scanner_->literal_length() == 3) { if (scanner_->is_literal_ascii() && scanner_->literal_length() == 3) {
const char* token = scanner_->literal_string(); const char* token = scanner_->literal_ascii_string().start();
*is_get = strncmp(token, "get", 3) == 0; *is_get = strncmp(token, "get", 3) == 0;
*is_set = !*is_get && strncmp(token, "set", 3) == 0; *is_set = !*is_get && strncmp(token, "set", 3) == 0;
} }
......
...@@ -34,28 +34,6 @@ ...@@ -34,28 +34,6 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
// ----------------------------------------------------------------------------
// LiteralCollector
LiteralCollector::LiteralCollector()
: buffer_(kInitialCapacity), recording_(false) { }
LiteralCollector::~LiteralCollector() {}
void LiteralCollector::AddCharSlow(uc32 c) {
ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
int length = unibrow::Utf8::Length(c);
Vector<char> block = buffer_.AddBlock(length, '\0');
#ifdef DEBUG
int written_length = unibrow::Utf8::Encode(block.start(), c);
CHECK_EQ(length, written_length);
#else
unibrow::Utf8::Encode(block.start(), c);
#endif
}
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Character predicates // Character predicates
...@@ -256,7 +234,7 @@ Token::Value JavaScriptScanner::ScanHtmlComment() { ...@@ -256,7 +234,7 @@ Token::Value JavaScriptScanner::ScanHtmlComment() {
void JavaScriptScanner::Scan() { void JavaScriptScanner::Scan() {
next_.literal_chars = Vector<const char>(); next_.literal_chars = NULL;
Token::Value token; Token::Value token;
do { do {
// Remember the position of the next token // Remember the position of the next token
...@@ -561,7 +539,7 @@ Token::Value JavaScriptScanner::ScanString() { ...@@ -561,7 +539,7 @@ Token::Value JavaScriptScanner::ScanString() {
uc32 quote = c0_; uc32 quote = c0_;
Advance(); // consume quote Advance(); // consume quote
LiteralScope literal(this, kLiteralString); LiteralScope literal(this);
while (c0_ != quote && c0_ >= 0 while (c0_ != quote && c0_ >= 0
&& !ScannerConstants::kIsLineTerminator.get(c0_)) { && !ScannerConstants::kIsLineTerminator.get(c0_)) {
uc32 c = c0_; uc32 c = c0_;
...@@ -592,7 +570,7 @@ Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { ...@@ -592,7 +570,7 @@ Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
LiteralScope literal(this, kLiteralNumber); LiteralScope literal(this);
if (seen_period) { if (seen_period) {
// we have already seen a decimal point of the float // we have already seen a decimal point of the float
AddLiteralChar('.'); AddLiteralChar('.');
...@@ -681,7 +659,7 @@ uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { ...@@ -681,7 +659,7 @@ uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
LiteralScope literal(this, kLiteralIdentifier); LiteralScope literal(this);
KeywordMatcher keyword_match; KeywordMatcher keyword_match;
// Scan identifier start character. // Scan identifier start character.
if (c0_ == '\\') { if (c0_ == '\\') {
...@@ -747,7 +725,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { ...@@ -747,7 +725,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp // the scanner should pass uninterpreted bodies to the RegExp
// constructor. // constructor.
LiteralScope literal(this, kLiteralRegExp); LiteralScope literal(this);
if (seen_equal) if (seen_equal)
AddLiteralChar('='); AddLiteralChar('=');
...@@ -773,7 +751,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { ...@@ -773,7 +751,7 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
bool JavaScriptScanner::ScanRegExpFlags() { bool JavaScriptScanner::ScanRegExpFlags() {
// Scan regular expression flags. // Scan regular expression flags.
LiteralScope literal(this, kLiteralRegExpFlags); LiteralScope literal(this);
while (ScannerConstants::kIsIdentifierPart.get(c0_)) { while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') { if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape(); uc32 c = ScanIdentifierUnicodeEscape();
......
This diff is collapsed.
...@@ -324,10 +324,8 @@ void Scanner::LiteralScope::Complete() { ...@@ -324,10 +324,8 @@ void Scanner::LiteralScope::Complete() {
V8JavaScriptScanner::V8JavaScriptScanner() : JavaScriptScanner() { } V8JavaScriptScanner::V8JavaScriptScanner() : JavaScriptScanner() { }
void V8JavaScriptScanner::Initialize(UC16CharacterStream* source, void V8JavaScriptScanner::Initialize(UC16CharacterStream* source) {
int literal_flags) {
source_ = source; source_ = source;
literal_flags_ = literal_flags | kLiteralIdentifier;
// Need to capture identifiers in order to recognize "get" and "set" // Need to capture identifiers in order to recognize "get" and "set"
// in object literals. // in object literals.
Init(); Init();
...@@ -377,7 +375,7 @@ bool JsonScanner::SkipJsonWhiteSpace() { ...@@ -377,7 +375,7 @@ bool JsonScanner::SkipJsonWhiteSpace() {
void JsonScanner::ScanJson() { void JsonScanner::ScanJson() {
next_.literal_chars = Vector<const char>(); next_.literal_chars = NULL;
Token::Value token; Token::Value token;
do { do {
// Remember the position of the next token // Remember the position of the next token
......
...@@ -134,8 +134,7 @@ class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { ...@@ -134,8 +134,7 @@ class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
class V8JavaScriptScanner : public JavaScriptScanner { class V8JavaScriptScanner : public JavaScriptScanner {
public: public:
V8JavaScriptScanner(); V8JavaScriptScanner();
void Initialize(UC16CharacterStream* source, void Initialize(UC16CharacterStream* source);
int literal_flags = kAllLiterals);
}; };
......
...@@ -530,6 +530,24 @@ class Collector { ...@@ -530,6 +530,24 @@ class Collector {
} }
// Add a contiguous block of elements and return a vector backed
// by the added block.
// A basic Collector will keep this vector valid as long as the Collector
// is alive.
inline Vector<T> AddBlock(Vector<const T> source) {
if (source.length() > current_chunk_.length() - index_) {
Grow(source.length());
}
T* position = current_chunk_.start() + index_;
index_ += source.length();
size_ += source.length();
for (int i = 0; i < source.length(); i++) {
position[i] = source[i];
}
return Vector<T>(position, source.length());
}
// Write the contents of the collector into the provided vector. // Write the contents of the collector into the provided vector.
void WriteTo(Vector<T> destination) { void WriteTo(Vector<T> destination) {
ASSERT(size_ <= destination.length()); ASSERT(size_ <= destination.length());
......
...@@ -573,7 +573,7 @@ void TestStreamScanner(i::UC16CharacterStream* stream, ...@@ -573,7 +573,7 @@ void TestStreamScanner(i::UC16CharacterStream* stream,
int skip_pos = 0, // Zero means not skipping. int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) { int skip_to = 0) {
i::V8JavaScriptScanner scanner; i::V8JavaScriptScanner scanner;
scanner.Initialize(stream, i::JavaScriptScanner::kAllLiterals); scanner.Initialize(stream);
int i = 0; int i = 0;
do { do {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment