Commit 87e77085 authored by dcarney@chromium.org's avatar dcarney@chromium.org

Move most scanner buffer accesses into scanner.

R=marja@chromium.org

BUG=

Review URL: https://codereview.chromium.org/197103002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19849 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 8735adb2
......@@ -212,13 +212,7 @@ Handle<String> Parser::LookupSymbol(int symbol_id) {
// count.
if (symbol_id < 0 ||
(pre_parse_data_ && symbol_id >= pre_parse_data_->symbol_count())) {
if (scanner()->is_literal_ascii()) {
return isolate()->factory()->InternalizeOneByteString(
Vector<const uint8_t>::cast(scanner()->literal_ascii_string()));
} else {
return isolate()->factory()->InternalizeTwoByteString(
scanner()->literal_utf16_string());
}
return scanner()->AllocateInternalizedString(isolate_);
}
return LookupCachedSymbol(symbol_id);
}
......@@ -233,13 +227,7 @@ Handle<String> Parser::LookupCachedSymbol(int symbol_id) {
}
Handle<String> result = symbol_cache_.at(symbol_id);
if (result.is_null()) {
if (scanner()->is_literal_ascii()) {
result = isolate()->factory()->InternalizeOneByteString(
Vector<const uint8_t>::cast(scanner()->literal_ascii_string()));
} else {
result = isolate()->factory()->InternalizeTwoByteString(
scanner()->literal_utf16_string());
}
result = scanner()->AllocateInternalizedString(isolate_);
symbol_cache_.at(symbol_id) = result;
return result;
}
......@@ -514,13 +502,7 @@ Handle<String> ParserTraits::GetSymbol(Scanner* scanner) {
Handle<String> ParserTraits::NextLiteralString(Scanner* scanner,
PretenureFlag tenured) {
if (scanner->is_next_literal_ascii()) {
return parser_->isolate_->factory()->NewStringFromAscii(
scanner->next_literal_ascii_string(), tenured);
} else {
return parser_->isolate_->factory()->NewStringFromTwoByte(
scanner->next_literal_utf16_string(), tenured);
}
return scanner->AllocateNextLiteralString(parser_->isolate(), tenured);
}
......@@ -544,11 +526,7 @@ Literal* ParserTraits::ExpressionFromLiteral(
case Token::FALSE_LITERAL:
return factory->NewLiteral(isolate_factory->false_value(), pos);
case Token::NUMBER: {
ASSERT(scanner->is_literal_ascii());
double value = StringToDouble(parser_->isolate()->unicode_cache(),
scanner->literal_ascii_string(),
ALLOW_HEX | ALLOW_OCTAL |
ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
double value = scanner->DoubleValue();
return factory->NewNumberLiteral(value, pos);
}
default:
......
......@@ -704,16 +704,6 @@ class Parser : public ParserBase<ParserTraits> {
bool CheckInOrOf(bool accept_OF, ForEachStatement::VisitMode* visit_mode);
Handle<String> LiteralString(PretenureFlag tenured) {
if (scanner()->is_literal_ascii()) {
return isolate_->factory()->NewStringFromAscii(
scanner()->literal_ascii_string(), tenured);
} else {
return isolate_->factory()->NewStringFromTwoByte(
scanner()->literal_utf16_string(), tenured);
}
}
// Get odd-ball literals.
Literal* GetLiteralUndefined(int position);
......
......@@ -91,16 +91,11 @@ PreParserIdentifier PreParserTraits::GetSymbol(Scanner* scanner) {
} else if (scanner->current_token() == Token::YIELD) {
return PreParserIdentifier::Yield();
}
if (scanner->is_literal_ascii()) {
// Detect strict-mode poison words.
if (scanner->literal_length() == 4 &&
!strncmp(scanner->literal_ascii_string().start(), "eval", 4)) {
return PreParserIdentifier::Eval();
}
if (scanner->literal_length() == 9 &&
!strncmp(scanner->literal_ascii_string().start(), "arguments", 9)) {
return PreParserIdentifier::Arguments();
}
if (scanner->UnescapedLiteralMatches("eval", 4)) {
return PreParserIdentifier::Eval();
}
if (scanner->UnescapedLiteralMatches("arguments", 9)) {
return PreParserIdentifier::Arguments();
}
return PreParserIdentifier::Default();
}
......@@ -108,14 +103,8 @@ PreParserIdentifier PreParserTraits::GetSymbol(Scanner* scanner) {
PreParserExpression PreParserTraits::ExpressionFromString(
int pos, Scanner* scanner, PreParserFactory* factory) {
const int kUseStrictLength = 10;
const char* kUseStrictChars = "use strict";
pre_parser_->LogSymbol();
if (scanner->is_literal_ascii() &&
scanner->literal_length() == kUseStrictLength &&
!scanner->literal_contains_escapes() &&
!strncmp(scanner->literal_ascii_string().start(), kUseStrictChars,
kUseStrictLength)) {
if (scanner->UnescapedLiteralMatches("use strict", 10)) {
return PreParserExpression::UseStrictStringLiteral();
}
return PreParserExpression::StringLiteral();
......@@ -1176,9 +1165,9 @@ PreParser::Expression PreParser::ParseFunctionLiteral(
}
int prev_value;
if (scanner()->is_literal_ascii()) {
prev_value =
duplicate_finder.AddAsciiSymbol(scanner()->literal_ascii_string(), 1);
if (scanner()->is_literal_one_byte()) {
prev_value = duplicate_finder.AddAsciiSymbol(
scanner()->literal_one_byte_string(), 1);
} else {
prev_value =
duplicate_finder.AddUtf16Symbol(scanner()->literal_utf16_string(), 1);
......@@ -1285,8 +1274,8 @@ PreParser::Expression PreParser::ParseV8Intrinsic(bool* ok) {
void PreParser::LogSymbol() {
int identifier_pos = position();
if (scanner()->is_literal_ascii()) {
log_->LogAsciiSymbol(identifier_pos, scanner()->literal_ascii_string());
if (scanner()->is_literal_one_byte()) {
log_->LogAsciiSymbol(identifier_pos, scanner()->literal_one_byte_string());
} else {
log_->LogUtf16Symbol(identifier_pos, scanner()->literal_utf16_string());
}
......
......@@ -1114,12 +1114,7 @@ ParserBase<Traits>::ParseIdentifierNameOrGetOrSet(bool* is_get,
bool* ok) {
typename Traits::Type::Identifier result = ParseIdentifierName(ok);
if (!*ok) return Traits::EmptyIdentifier();
if (scanner()->is_literal_ascii() &&
scanner()->literal_length() == 3) {
const char* token = scanner()->literal_ascii_string().start();
*is_get = strncmp(token, "get", 3) == 0;
*is_set = !*is_get && strncmp(token, "set", 3) == 0;
}
scanner()->IsGetOrSet(is_get, is_set);
return result;
}
......@@ -1517,9 +1512,9 @@ void ParserBase<Traits>::ObjectLiteralChecker::CheckProperty(
bool* ok) {
int old;
if (property == Token::NUMBER) {
old = finder_.AddNumber(scanner()->literal_ascii_string(), type);
} else if (scanner()->is_literal_ascii()) {
old = finder_.AddAsciiSymbol(scanner()->literal_ascii_string(), type);
old = finder_.AddNumber(scanner()->literal_one_byte_string(), type);
} else if (scanner()->is_literal_one_byte()) {
old = finder_.AddAsciiSymbol(scanner()->literal_one_byte_string(), type);
} else {
old = finder_.AddUtf16Symbol(scanner()->literal_utf16_string(), type);
}
......
......@@ -35,6 +35,7 @@
#include "char-predicates-inl.h"
#include "conversions-inl.h"
#include "list-inl.h"
#include "v8.h"
namespace v8 {
namespace internal {
......@@ -982,8 +983,8 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {
literal.Complete();
if (next_.literal_chars->is_ascii()) {
Vector<const char> chars = next_.literal_chars->ascii_literal();
if (next_.literal_chars->is_one_byte()) {
Vector<const char> chars = next_.literal_chars->one_byte_literal();
return KeywordOrIdentifierToken(chars.start(),
chars.length(),
harmony_scoping_,
......@@ -1114,6 +1115,49 @@ bool Scanner::ScanRegExpFlags() {
}
Handle<String> Scanner::AllocateLiteralString(Isolate* isolate,
PretenureFlag tenured) {
if (is_literal_one_byte()) {
return isolate->factory()->NewStringFromOneByte(
Vector<const uint8_t>::cast(literal_one_byte_string()), tenured);
} else {
return isolate->factory()->NewStringFromTwoByte(
literal_utf16_string(), tenured);
}
}
Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate,
PretenureFlag tenured) {
if (is_next_literal_one_byte()) {
return isolate->factory()->NewStringFromOneByte(
Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured);
} else {
return isolate->factory()->NewStringFromTwoByte(
next_literal_utf16_string(), tenured);
}
}
Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) {
if (is_literal_one_byte()) {
return isolate->factory()->InternalizeOneByteString(
Vector<const uint8_t>::cast(literal_one_byte_string()));
} else {
return isolate->factory()->InternalizeTwoByteString(
literal_utf16_string());
}
}
double Scanner::DoubleValue() {
ASSERT(is_literal_one_byte());
return StringToDouble(
unicode_cache_, literal_one_byte_string(),
ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
}
int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) {
return AddSymbol(Vector<const byte>::cast(key), true, value);
}
......@@ -1125,10 +1169,10 @@ int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) {
int DuplicateFinder::AddSymbol(Vector<const byte> key,
bool is_ascii,
bool is_one_byte,
int value) {
uint32_t hash = Hash(key, is_ascii);
byte* encoding = BackupKey(key, is_ascii);
uint32_t hash = Hash(key, is_one_byte);
byte* encoding = BackupKey(key, is_one_byte);
HashMap::Entry* entry = map_.Lookup(encoding, hash, true);
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
entry->value =
......@@ -1189,11 +1233,11 @@ bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {
}
uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_ascii) {
uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_one_byte) {
// Primitive hash function, almost identical to the one used
// for strings (except that it's seeded by the length and ASCII-ness).
int length = key.length();
uint32_t hash = (length << 1) | (is_ascii ? 1 : 0) ;
uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0) ;
for (int i = 0; i < length; i++) {
uint32_t c = key[i];
hash = (hash + c) * 1025;
......@@ -1211,39 +1255,39 @@ bool DuplicateFinder::Match(void* first, void* second) {
// was ASCII.
byte* s1 = reinterpret_cast<byte*>(first);
byte* s2 = reinterpret_cast<byte*>(second);
uint32_t length_ascii_field = 0;
uint32_t length_one_byte_field = 0;
byte c1;
do {
c1 = *s1;
if (c1 != *s2) return false;
length_ascii_field = (length_ascii_field << 7) | (c1 & 0x7f);
length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
s1++;
s2++;
} while ((c1 & 0x80) != 0);
int length = static_cast<int>(length_ascii_field >> 1);
int length = static_cast<int>(length_one_byte_field >> 1);
return memcmp(s1, s2, length) == 0;
}
byte* DuplicateFinder::BackupKey(Vector<const byte> bytes,
bool is_ascii) {
uint32_t ascii_length = (bytes.length() << 1) | (is_ascii ? 1 : 0);
bool is_one_byte) {
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
backing_store_.StartSequence();
// Emit ascii_length as base-128 encoded number, with the 7th bit set
// Emit one_byte_length as base-128 encoded number, with the 7th bit set
// on the byte of every heptet except the last, least significant, one.
if (ascii_length >= (1 << 7)) {
if (ascii_length >= (1 << 14)) {
if (ascii_length >= (1 << 21)) {
if (ascii_length >= (1 << 28)) {
backing_store_.Add(static_cast<byte>((ascii_length >> 28) | 0x80));
if (one_byte_length >= (1 << 7)) {
if (one_byte_length >= (1 << 14)) {
if (one_byte_length >= (1 << 21)) {
if (one_byte_length >= (1 << 28)) {
backing_store_.Add(static_cast<byte>((one_byte_length >> 28) | 0x80));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 21) | 0x80u));
backing_store_.Add(static_cast<byte>((one_byte_length >> 21) | 0x80u));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 14) | 0x80u));
backing_store_.Add(static_cast<byte>((one_byte_length >> 14) | 0x80u));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u));
backing_store_.Add(static_cast<byte>((one_byte_length >> 7) | 0x80u));
}
backing_store_.Add(static_cast<byte>(ascii_length & 0x7f));
backing_store_.Add(static_cast<byte>(one_byte_length & 0x7f));
backing_store_.AddBlock(bytes);
return backing_store_.EndSequence().start();
......
......@@ -176,19 +176,19 @@ class DuplicateFinder {
int AddNumber(Vector<const char> key, int value);
private:
int AddSymbol(Vector<const byte> key, bool is_ascii, int value);
int AddSymbol(Vector<const byte> key, bool is_one_byte, int value);
// Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the
// length (plus a bit saying whether the string is ASCII),
// followed by the bytes of the key.
byte* BackupKey(Vector<const byte> key, bool is_ascii);
byte* BackupKey(Vector<const byte> key, bool is_one_byte);
// Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and ASCII-ness,
// and then having the same 'length' bytes following.
static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes.
static uint32_t Hash(Vector<const byte> key, bool is_ascii);
static uint32_t Hash(Vector<const byte> key, bool is_one_byte);
// Checks whether a string containing a JS number is its canonical
// form.
static bool IsNumberCanonical(Vector<const char> key);
......@@ -211,7 +211,7 @@ class DuplicateFinder {
class LiteralBuffer {
public:
LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
~LiteralBuffer() {
if (backing_store_.length() > 0) {
......@@ -221,7 +221,7 @@ class LiteralBuffer {
INLINE(void AddChar(uint32_t code_unit)) {
if (position_ >= backing_store_.length()) ExpandBuffer();
if (is_ascii_) {
if (is_one_byte_) {
if (code_unit <= unibrow::Latin1::kMaxChar) {
backing_store_[position_] = static_cast<byte>(code_unit);
position_ += kOneByteSize;
......@@ -234,35 +234,35 @@ class LiteralBuffer {
position_ += kUC16Size;
}
bool is_ascii() { return is_ascii_; }
bool is_one_byte() { return is_one_byte_; }
bool is_contextual_keyword(Vector<const char> keyword) {
return is_ascii() && keyword.length() == position_ &&
return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.start(), backing_store_.start(), position_) == 0);
}
Vector<const uc16> utf16_literal() {
ASSERT(!is_ascii_);
ASSERT(!is_one_byte_);
ASSERT((position_ & 0x1) == 0);
return Vector<const uc16>(
reinterpret_cast<const uc16*>(backing_store_.start()),
position_ >> 1);
}
Vector<const char> ascii_literal() {
ASSERT(is_ascii_);
Vector<const char> one_byte_literal() {
ASSERT(is_one_byte_);
return Vector<const char>(
reinterpret_cast<const char*>(backing_store_.start()),
position_);
}
int length() {
return is_ascii_ ? position_ : (position_ >> 1);
return is_one_byte_ ? position_ : (position_ >> 1);
}
void Reset() {
position_ = 0;
is_ascii_ = true;
is_one_byte_ = true;
}
private:
......@@ -284,7 +284,7 @@ class LiteralBuffer {
}
void ConvertToUtf16() {
ASSERT(is_ascii_);
ASSERT(is_one_byte_);
Vector<byte> new_store;
int new_content_size = position_ * kUC16Size;
if (new_content_size >= backing_store_.length()) {
......@@ -304,10 +304,10 @@ class LiteralBuffer {
backing_store_ = new_store;
}
position_ = new_content_size;
is_ascii_ = false;
is_one_byte_ = false;
}
bool is_ascii_;
bool is_one_byte_;
int position_;
Vector<byte> backing_store_;
......@@ -376,17 +376,17 @@ class Scanner {
// numbers.
// These functions only give the correct result if the literal
// was scanned between calls to StartLiteral() and TerminateLiteral().
Vector<const char> literal_ascii_string() {
Vector<const char> literal_one_byte_string() {
ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->ascii_literal();
return current_.literal_chars->one_byte_literal();
}
Vector<const uc16> literal_utf16_string() {
ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->utf16_literal();
}
bool is_literal_ascii() {
bool is_literal_one_byte() {
ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->is_ascii();
return current_.literal_chars->is_one_byte();
}
bool is_literal_contextual_keyword(Vector<const char> keyword) {
ASSERT_NOT_NULL(current_.literal_chars);
......@@ -416,17 +416,17 @@ class Scanner {
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
Vector<const char> next_literal_ascii_string() {
Vector<const char> next_literal_one_byte_string() {
ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->ascii_literal();
return next_.literal_chars->one_byte_literal();
}
Vector<const uc16> next_literal_utf16_string() {
ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->utf16_literal();
}
bool is_next_literal_ascii() {
bool is_next_literal_one_byte() {
ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->is_ascii();
return next_.literal_chars->is_one_byte();
}
bool is_next_contextual_keyword(Vector<const char> keyword) {
ASSERT_NOT_NULL(next_.literal_chars);
......@@ -437,6 +437,30 @@ class Scanner {
return next_.literal_chars->length();
}
Handle<String> AllocateLiteralString(Isolate* isolate, PretenureFlag tenured);
Handle<String> AllocateNextLiteralString(Isolate* isolate,
PretenureFlag tenured);
Handle<String> AllocateInternalizedString(Isolate* isolate);
double DoubleValue();
bool UnescapedLiteralMatches(const char* data, int length) {
if (is_literal_one_byte() &&
literal_length() == length &&
!literal_contains_escapes()) {
return !strncmp(literal_one_byte_string().start(), data, length);
}
return false;
}
void IsGetOrSet(bool* is_get, bool* is_set) {
if (is_literal_one_byte() &&
literal_length() == 3 &&
!literal_contains_escapes()) {
const char* token = literal_one_byte_string().start();
*is_get = strncmp(token, "get", 3) == 0;
*is_set = !*is_get && strncmp(token, "set", 3) == 0;
}
}
UnicodeCache* unicode_cache() { return unicode_cache_; }
static const int kCharacterLookaheadBufferSize = 1;
......
......@@ -802,8 +802,8 @@ void TestScanRegExp(const char* re_source, const char* expected) {
CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
scanner.Next(); // Current token is now the regexp literal.
CHECK(scanner.is_literal_ascii());
i::Vector<const char> actual = scanner.literal_ascii_string();
CHECK(scanner.is_literal_one_byte());
i::Vector<const char> actual = scanner.literal_one_byte_string();
for (int i = 0; i < actual.length(); i++) {
CHECK_NE('\0', expected[i]);
CHECK_EQ(expected[i], actual[i]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment