Commit b0c4a876 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[json] Speed up json parsing

- scan using raw data pointers + GC callback
- scan using scanner tables
- cap internalizing large string values
- inline fast transitioning logic

Fixes previous CL by moving AllowHeapAllocation to callers of
ReportUnexpectedCharacter where needed to make it clear we need to exit.

Tbr: ulan@chromium.org
Change-Id: Icfbb7cd536e0fbe153f34acca5d0fab6b5453d71
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1591778Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61159}
parent 3ce92ce8
......@@ -2571,6 +2571,8 @@ v8_source_set("v8_base_without_compiler") {
"src/parsing/expression-scope.h",
"src/parsing/func-name-inferrer.cc",
"src/parsing/func-name-inferrer.h",
"src/parsing/literal-buffer.cc",
"src/parsing/literal-buffer.h",
"src/parsing/parse-info.cc",
"src/parsing/parse-info.h",
"src/parsing/parser-base.h",
......
......@@ -54,37 +54,16 @@ class OneByteStringStream {
} // namespace
class AstRawStringInternalizationKey : public StringTableKey {
public:
explicit AstRawStringInternalizationKey(const AstRawString* string)
: StringTableKey(string->hash_field()), string_(string) {}
bool IsMatch(Object other) override {
if (string_->is_one_byte())
return String::cast(other)->IsOneByteEqualTo(string_->literal_bytes_);
return String::cast(other)->IsTwoByteEqualTo(
Vector<const uint16_t>::cast(string_->literal_bytes_));
}
Handle<String> AsHandle(Isolate* isolate) override {
if (string_->is_one_byte())
return isolate->factory()->NewOneByteInternalizedString(
string_->literal_bytes_, string_->hash_field());
return isolate->factory()->NewTwoByteInternalizedString(
Vector<const uint16_t>::cast(string_->literal_bytes_),
string_->hash_field());
}
private:
const AstRawString* string_;
};
void AstRawString::Internalize(Isolate* isolate) {
DCHECK(!has_string_);
if (literal_bytes_.length() == 0) {
set_string(isolate->factory()->empty_string());
} else if (is_one_byte()) {
OneByteStringKey key(hash_field_, literal_bytes_);
set_string(StringTable::LookupKey(isolate, &key));
} else {
AstRawStringInternalizationKey key(this);
TwoByteStringKey key(hash_field_,
Vector<const uint16_t>::cast(literal_bytes_));
set_string(StringTable::LookupKey(isolate, &key));
}
}
......
......@@ -685,7 +685,8 @@ Handle<AccessorPair> Factory::NewAccessorPair() {
}
// Internalized strings are created in the old generation (data space).
Handle<String> Factory::InternalizeUtf8String(Vector<const char> string) {
Handle<String> Factory::InternalizeUtf8String(
const Vector<const char>& string) {
Vector<const uint8_t> utf8_data = Vector<const uint8_t>::cast(string);
Utf8Decoder decoder(utf8_data);
if (decoder.is_ascii()) return InternalizeOneByteString(utf8_data);
......@@ -701,7 +702,8 @@ Handle<String> Factory::InternalizeUtf8String(Vector<const char> string) {
Vector<const uc16>(buffer.get(), decoder.utf16_length()));
}
Handle<String> Factory::InternalizeOneByteString(Vector<const uint8_t> string) {
Handle<String> Factory::InternalizeOneByteString(
const Vector<const uint8_t>& string) {
OneByteStringKey key(string, HashSeed(isolate()));
return InternalizeStringWithKey(&key);
}
......@@ -712,7 +714,8 @@ Handle<String> Factory::InternalizeOneByteString(
return InternalizeStringWithKey(&key);
}
Handle<String> Factory::InternalizeTwoByteString(Vector<const uc16> string) {
Handle<String> Factory::InternalizeTwoByteString(
const Vector<const uc16>& string) {
TwoByteStringKey key(string, HashSeed(isolate()));
return InternalizeStringWithKey(&key);
}
......@@ -722,8 +725,8 @@ Handle<String> Factory::InternalizeStringWithKey(StringTableKey* key) {
return StringTable::LookupKey(isolate(), key);
}
MaybeHandle<String> Factory::NewStringFromOneByte(Vector<const uint8_t> string,
AllocationType allocation) {
MaybeHandle<String> Factory::NewStringFromOneByte(
const Vector<const uint8_t>& string, AllocationType allocation) {
DCHECK_NE(allocation, AllocationType::kReadOnly);
int length = string.length();
if (length == 0) return empty_string();
......@@ -740,9 +743,9 @@ MaybeHandle<String> Factory::NewStringFromOneByte(Vector<const uint8_t> string,
return result;
}
MaybeHandle<String> Factory::NewStringFromUtf8(Vector<const char> data,
MaybeHandle<String> Factory::NewStringFromUtf8(const Vector<const char>& string,
AllocationType allocation) {
Vector<const uint8_t> utf8_data = Vector<const uint8_t>::cast(data);
Vector<const uint8_t> utf8_data = Vector<const uint8_t>::cast(string);
Utf8Decoder decoder(utf8_data);
if (decoder.utf16_length() == 0) return empty_string();
......@@ -846,8 +849,8 @@ MaybeHandle<String> Factory::NewStringFromTwoByte(const uc16* string,
}
}
MaybeHandle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string,
AllocationType allocation) {
MaybeHandle<String> Factory::NewStringFromTwoByte(
const Vector<const uc16>& string, AllocationType allocation) {
return NewStringFromTwoByte(string.begin(), string.length(), allocation);
}
......@@ -899,7 +902,7 @@ Handle<SeqOneByteString> Factory::AllocateRawOneByteInternalizedString(
}
Handle<String> Factory::AllocateTwoByteInternalizedString(
Vector<const uc16> str, uint32_t hash_field) {
const Vector<const uc16>& str, uint32_t hash_field) {
CHECK_GE(String::kMaxLength, str.length());
DCHECK_NE(0, str.length()); // Use Heap::empty_string() instead.
......@@ -958,8 +961,8 @@ Handle<String> Factory::AllocateInternalizedStringImpl(T t, int chars,
return answer;
}
Handle<String> Factory::NewOneByteInternalizedString(Vector<const uint8_t> str,
uint32_t hash_field) {
Handle<String> Factory::NewOneByteInternalizedString(
const Vector<const uint8_t>& str, uint32_t hash_field) {
Handle<SeqOneByteString> result =
AllocateRawOneByteInternalizedString(str.length(), hash_field);
DisallowHeapAllocation no_allocation;
......@@ -978,8 +981,8 @@ Handle<String> Factory::NewOneByteInternalizedSubString(
return result;
}
Handle<String> Factory::NewTwoByteInternalizedString(Vector<const uc16> str,
uint32_t hash_field) {
Handle<String> Factory::NewTwoByteInternalizedString(
const Vector<const uc16>& str, uint32_t hash_field) {
return AllocateTwoByteInternalizedString(str, hash_field);
}
......
......@@ -236,16 +236,16 @@ class V8_EXPORT_PRIVATE Factory {
// Finds the internalized copy for string in the string table.
// If not found, a new string is added to the table and returned.
Handle<String> InternalizeUtf8String(Vector<const char> str);
Handle<String> InternalizeUtf8String(const Vector<const char>& str);
Handle<String> InternalizeUtf8String(const char* str) {
return InternalizeUtf8String(CStrVector(str));
}
Handle<String> InternalizeOneByteString(Vector<const uint8_t> str);
Handle<String> InternalizeOneByteString(const Vector<const uint8_t>& str);
Handle<String> InternalizeOneByteString(Handle<SeqOneByteString>, int from,
int length);
Handle<String> InternalizeTwoByteString(Vector<const uc16> str);
Handle<String> InternalizeTwoByteString(const Vector<const uc16>& str);
template <class StringTableKey>
Handle<String> InternalizeStringWithKey(StringTableKey* key);
......@@ -276,7 +276,7 @@ class V8_EXPORT_PRIVATE Factory {
//
// One-byte strings are pretenured when used as keys in the SourceCodeCache.
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromOneByte(
Vector<const uint8_t> str,
const Vector<const uint8_t>& str,
AllocationType allocation = AllocationType::kYoung);
template <size_t N>
......@@ -297,7 +297,7 @@ class V8_EXPORT_PRIVATE Factory {
// UTF8 strings are pretenured when used for regexp literal patterns and
// flags in the parser.
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf8(
Vector<const char> str,
const Vector<const char>& str,
AllocationType allocation = AllocationType::kYoung);
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf8SubString(
......@@ -305,7 +305,7 @@ class V8_EXPORT_PRIVATE Factory {
AllocationType allocation = AllocationType::kYoung);
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromTwoByte(
Vector<const uc16> str,
const Vector<const uc16>& str,
AllocationType allocation = AllocationType::kYoung);
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromTwoByte(
......@@ -314,14 +314,14 @@ class V8_EXPORT_PRIVATE Factory {
Handle<JSStringIterator> NewJSStringIterator(Handle<String> string);
Handle<String> NewOneByteInternalizedString(Vector<const uint8_t> str,
Handle<String> NewOneByteInternalizedString(const Vector<const uint8_t>& str,
uint32_t hash_field);
Handle<String> NewOneByteInternalizedSubString(
Handle<SeqOneByteString> string, int offset, int length,
uint32_t hash_field);
Handle<String> NewTwoByteInternalizedString(Vector<const uc16> str,
Handle<String> NewTwoByteInternalizedString(const Vector<const uc16>& str,
uint32_t hash_field);
Handle<String> NewInternalizedStringImpl(Handle<String> string, int chars,
......@@ -1050,8 +1050,8 @@ class V8_EXPORT_PRIVATE Factory {
Handle<SeqOneByteString> AllocateRawOneByteInternalizedString(
int length, uint32_t hash_field);
Handle<String> AllocateTwoByteInternalizedString(Vector<const uc16> str,
uint32_t hash_field);
Handle<String> AllocateTwoByteInternalizedString(
const Vector<const uc16>& str, uint32_t hash_field);
MaybeHandle<String> NewStringFromTwoByte(const uc16* string, int length,
AllocationType allocation);
......
......@@ -15,7 +15,7 @@
#include "src/objects/hash-table-inl.h"
#include "src/property-descriptor.h"
#include "src/string-hasher.h"
#include "src/transitions.h"
#include "src/transitions-inl.h"
namespace v8 {
namespace internal {
......@@ -49,6 +49,101 @@ class VectorSegment {
const typename Container::size_type begin_;
};
constexpr JsonToken GetOneCharToken(uint8_t c) {
// clang-format off
return
c == '"' ? JsonToken::STRING :
IsDecimalDigit(c) ? JsonToken::NUMBER :
c == '-' ? JsonToken::NEGATIVE_NUMBER :
c == '[' ? JsonToken::LBRACK :
c == '{' ? JsonToken::LBRACE :
c == ']' ? JsonToken::RBRACK :
c == '}' ? JsonToken::RBRACE :
c == 't' ? JsonToken::TRUE_LITERAL :
c == 'f' ? JsonToken::FALSE_LITERAL :
c == 'n' ? JsonToken::NULL_LITERAL :
c == ' ' ? JsonToken::WHITESPACE :
c == '\t' ? JsonToken::WHITESPACE :
c == '\r' ? JsonToken::WHITESPACE :
c == '\n' ? JsonToken::WHITESPACE :
c == ':' ? JsonToken::COLON :
c == ',' ? JsonToken::COMMA :
JsonToken::ILLEGAL;
// clang-format on
}
// Table of one-character tokens, by character (0x00..0xFF only).
static const constexpr JsonToken one_char_tokens[256] = {
#define CALL_GET_SCAN_FLAGS(N) GetOneCharToken(N),
INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS)
#undef CALL_GET_SCAN_FLAGS
#define CALL_GET_SCAN_FLAGS(N) GetOneCharToken(128 + N),
INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS)
#undef CALL_GET_SCAN_FLAGS
};
enum class EscapeKind : uint8_t {
kIllegal,
kSelf,
kBackspace,
kTab,
kNewLine,
kFormFeed,
kCarriageReturn,
kUnicode
};
using EscapeKindField = BitField8<EscapeKind, 0, 3>;
using MayTerminateStringField = BitField8<bool, EscapeKindField::kNext, 1>;
using NumberPartField = BitField8<bool, MayTerminateStringField::kNext, 1>;
constexpr bool MayTerminateString(uint8_t flags) {
return MayTerminateStringField::decode(flags);
}
constexpr EscapeKind GetEscapeKind(uint8_t flags) {
return EscapeKindField::decode(flags);
}
constexpr bool IsNumberPart(uint8_t flags) {
return NumberPartField::decode(flags);
}
constexpr uint8_t GetScanFlags(uint8_t c) {
// clang-format off
return (c == 'b' ? EscapeKindField::encode(EscapeKind::kBackspace)
: c == 't' ? EscapeKindField::encode(EscapeKind::kTab)
: c == 'n' ? EscapeKindField::encode(EscapeKind::kNewLine)
: c == 'f' ? EscapeKindField::encode(EscapeKind::kFormFeed)
: c == 'r' ? EscapeKindField::encode(EscapeKind::kCarriageReturn)
: c == 'u' ? EscapeKindField::encode(EscapeKind::kUnicode)
: c == '"' ? EscapeKindField::encode(EscapeKind::kSelf)
: c == '\\' ? EscapeKindField::encode(EscapeKind::kSelf)
: c == '/' ? EscapeKindField::encode(EscapeKind::kSelf)
: EscapeKindField::encode(EscapeKind::kIllegal)) |
(c < 0x20 ? MayTerminateStringField::encode(true)
: c == '"' ? MayTerminateStringField::encode(true)
: c == '\\' ? MayTerminateStringField::encode(true)
: MayTerminateStringField::encode(false)) |
NumberPartField::encode(c == '.' ||
c == 'e' ||
c == 'E' ||
IsDecimalDigit(c) ||
c == '-' ||
c == '+');
// clang-format on
}
// Table of one-character scan flags, by character (0x00..0xFF only).
static const constexpr uint8_t character_scan_flags[256] = {
#define CALL_GET_SCAN_FLAGS(N) GetScanFlags(N),
INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS)
#undef CALL_GET_SCAN_FLAGS
#define CALL_GET_SCAN_FLAGS(N) GetScanFlags(128 + N),
INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS)
#undef CALL_GET_SCAN_FLAGS
};
} // namespace
MaybeHandle<Object> JsonParseInternalizer::Internalize(Isolate* isolate,
......@@ -138,16 +233,15 @@ template <typename Char>
JsonParser<Char>::JsonParser(Isolate* isolate, Handle<String> source)
: isolate_(isolate),
zone_(isolate_->allocator(), ZONE_NAME),
hash_seed_(HashSeed(isolate)),
object_constructor_(isolate_->object_function()),
offset_(0),
length_(source->length()),
position_(-1),
original_source_(source),
properties_(&zone_) {
size_t start = 0;
size_t length = source->length();
if (source->IsSlicedString()) {
SlicedString string = SlicedString::cast(*source);
offset_ = string.offset();
length_ += offset_;
position_ += offset_;
start = string.offset();
String parent = string.parent();
if (parent.IsThinString()) parent = ThinString::cast(parent).actual();
source_ = handle(parent, isolate);
......@@ -158,17 +252,66 @@ JsonParser<Char>::JsonParser(Isolate* isolate, Handle<String> source)
if (StringShape(*source_).IsExternal()) {
chars_ =
static_cast<const Char*>(SeqExternalString::cast(*source_)->GetChars());
chars_may_relocate_ = false;
} else {
DisallowHeapAllocation no_gc;
isolate->heap()->AddGCEpilogueCallback(UpdatePointersCallback,
v8::kGCTypeAll, this);
chars_ = SeqString::cast(*source_)->GetChars(no_gc);
chars_may_relocate_ = true;
}
cursor_ = chars_ + start;
end_ = cursor_ + length;
allocation_ = (source->length() >= kPretenureTreshold)
? AllocationType::kOld
: AllocationType::kYoung;
}
template <typename Char>
void JsonParser<Char>::ReportUnexpectedCharacter(JsonToken token) {
// Some exception (for example stack overflow) is already pending.
if (isolate_->has_pending_exception()) return;
// Parse failed. Current character is the unexpected token.
Factory* factory = this->factory();
MessageTemplate message;
Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position()), isolate());
Handle<Object> arg2;
switch (token) {
case JsonToken::EOS:
message = MessageTemplate::kJsonParseUnexpectedEOS;
break;
case JsonToken::NUMBER:
case JsonToken::NEGATIVE_NUMBER:
message = MessageTemplate::kJsonParseUnexpectedTokenNumber;
break;
case JsonToken::STRING:
message = MessageTemplate::kJsonParseUnexpectedTokenString;
break;
default:
message = MessageTemplate::kJsonParseUnexpectedToken;
arg2 = arg1;
arg1 = factory->LookupSingleCharacterStringFromCode(*cursor_);
break;
}
Handle<Script> script(factory->NewScript(original_source_));
if (isolate()->NeedsSourcePositionsForProfiling()) {
Script::InitLineEnds(script);
}
// We should sent compile error event because we compile JSON object in
// separated source file.
isolate()->debug()->OnCompileError(script);
MessageLocation location(script, position(), position() + 1);
Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2);
isolate()->Throw(*error, &location);
// Move the cursor to the end so we won't be able to proceed parsing.
cursor_ = end_;
}
template <typename Char>
JsonParser<Char>::~JsonParser() {
if (StringShape(*source_).IsExternal()) {
......@@ -185,57 +328,9 @@ JsonParser<Char>::~JsonParser() {
template <typename Char>
MaybeHandle<Object> JsonParser<Char>::ParseJson() {
// Advance to the first character (possibly EOS)
AdvanceSkipWhitespace();
Handle<Object> result = ParseJsonValue();
if (result.is_null() || c0_ != kEndOfString) {
// Some exception (for example stack overflow) is already pending.
if (isolate_->has_pending_exception()) return Handle<Object>::null();
// Parse failed. Current character is the unexpected token.
Factory* factory = this->factory();
MessageTemplate message;
Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position_), isolate());
Handle<Object> arg2;
switch (c0_) {
case kEndOfString:
message = MessageTemplate::kJsonParseUnexpectedEOS;
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
message = MessageTemplate::kJsonParseUnexpectedTokenNumber;
break;
case '"':
message = MessageTemplate::kJsonParseUnexpectedTokenString;
break;
default:
message = MessageTemplate::kJsonParseUnexpectedToken;
arg2 = arg1;
arg1 = factory->LookupSingleCharacterStringFromCode(c0_);
break;
}
Handle<Script> script(factory->NewScript(source_));
if (isolate()->NeedsSourcePositionsForProfiling()) {
Script::InitLineEnds(script);
}
// We should sent compile error event because we compile JSON object in
// separated source file.
isolate()->debug()->OnCompileError(script);
MessageLocation location(script, position_, position_ + 1);
Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2);
return isolate()->template Throw<Object>(error, &location);
}
if (!Check(JsonToken::EOS)) ReportUnexpectedCharacter(peek());
if (isolate_->has_pending_exception()) return MaybeHandle<Object>();
return result;
}
......@@ -243,163 +338,111 @@ MaybeHandle<Object> InternalizeJsonProperty(Handle<JSObject> holder,
Handle<String> key);
template <typename Char>
void JsonParser<Char>::Advance() {
position_++;
if (position_ >= length_) {
c0_ = kEndOfString;
} else {
c0_ = chars_[position_];
}
}
template <typename Char>
void JsonParser<Char>::AdvanceSkipWhitespace() {
do {
Advance();
} while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
Char JsonParser<Char>::NextCharacter() {
advance();
if (V8_UNLIKELY(is_at_end())) return kEndOfString;
return *cursor_;
}
template <typename Char>
void JsonParser<Char>::SkipWhitespace() {
while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
Advance();
}
}
template <typename Char>
uc32 JsonParser<Char>::AdvanceGetChar() {
Advance();
return c0_;
}
template <typename Char>
bool JsonParser<Char>::MatchSkipWhiteSpace(uc32 c) {
if (c0_ == c) {
AdvanceSkipWhitespace();
return true;
}
return false;
}
template <typename Char>
bool JsonParser<Char>::ParseJsonString(Handle<String> expected) {
int length = expected->length();
if (source_->length() - position_ - 1 > length) {
DisallowHeapAllocation no_gc;
String::FlatContent content = expected->GetFlatContent(no_gc);
DCHECK_EQ('"', c0_);
if (content.IsOneByte()) {
const Char* input_chars = chars_ + position_ + 1;
const uint8_t* expected_chars = content.ToOneByteVector().begin();
for (int i = 0; i < length; i++) {
Char c0 = input_chars[i];
if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
return false;
}
}
if (input_chars[length] == '"') {
position_ = position_ + length + 1;
AdvanceSkipWhitespace();
return true;
}
} else {
const Char* input_chars = chars_ + position_ + 1;
const uint16_t* expected_chars = content.ToUC16Vector().begin();
for (int i = 0; i < length; i++) {
Char c0 = input_chars[i];
if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
return false;
}
}
if (input_chars[length] == '"') {
position_ = position_ + length + 1;
AdvanceSkipWhitespace();
return true;
}
}
}
return false;
next_ = JsonToken::EOS;
cursor_ = std::find_if(cursor_, end_, [this](Char c) {
JsonToken current = V8_LIKELY(c <= unibrow::Latin1::kMaxChar)
? one_char_tokens[c]
: JsonToken::ILLEGAL;
bool result = current != JsonToken::WHITESPACE;
if (result) next_ = current;
return result;
});
}
// Parse any JSON value.
template <typename Char>
Handle<Object> JsonParser<Char>::ParseJsonValue() {
StackLimitCheck stack_check(isolate_);
if (stack_check.HasOverflowed()) {
isolate_->StackOverflow();
return Handle<Object>::null();
}
if (stack_check.InterruptRequested() &&
isolate_->stack_guard()->HandleInterrupts()->IsException(isolate_)) {
return Handle<Object>::null();
}
if (V8_UNLIKELY(stack_check.InterruptRequested())) {
if (stack_check.HasOverflowed()) {
if (!isolate_->has_pending_exception()) isolate_->StackOverflow();
return factory()->undefined_value();
}
if (c0_ == '"') return ParseJsonString();
if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
if (c0_ == '{') return ParseJsonObject();
if (c0_ == '[') return ParseJsonArray();
if (c0_ == 'f') {
if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
AdvanceSkipWhitespace();
return factory()->false_value();
if (isolate_->stack_guard()->HandleInterrupts()->IsException(isolate_)) {
return factory()->undefined_value();
}
return ReportUnexpectedCharacter();
}
if (c0_ == 't') {
if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
AdvanceGetChar() == 'e') {
AdvanceSkipWhitespace();
SkipWhitespace();
switch (peek()) {
case JsonToken::STRING:
Consume(JsonToken::STRING);
return ParseJsonString(false);
case JsonToken::NUMBER:
return ParseJsonNumber(1, cursor_);
case JsonToken::NEGATIVE_NUMBER:
return ParseJsonNumber(-1, cursor_++);
case JsonToken::LBRACE:
return ParseJsonObject();
case JsonToken::LBRACK:
return ParseJsonArray();
case JsonToken::TRUE_LITERAL:
ScanLiteral("true");
return factory()->true_value();
}
return ReportUnexpectedCharacter();
}
if (c0_ == 'n') {
if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
AdvanceGetChar() == 'l') {
AdvanceSkipWhitespace();
case JsonToken::FALSE_LITERAL:
ScanLiteral("false");
return factory()->false_value();
case JsonToken::NULL_LITERAL:
ScanLiteral("null");
return factory()->null_value();
}
return ReportUnexpectedCharacter();
case JsonToken::COLON:
case JsonToken::COMMA:
case JsonToken::ILLEGAL:
case JsonToken::RBRACE:
case JsonToken::RBRACK:
case JsonToken::EOS:
ReportUnexpectedCharacter(peek());
return factory()->undefined_value();
case JsonToken::WHITESPACE:
UNREACHABLE();
}
return ReportUnexpectedCharacter();
}
template <typename Char>
ParseElementResult JsonParser<Char>::ParseElement(
Handle<JSObject> json_object) {
bool JsonParser<Char>::ParseElement(Handle<JSObject> json_object) {
uint32_t index = 0;
// Maybe an array index, try to parse it.
if (c0_ == '0') {
// With a leading zero, the string has to be "0" only to be an index.
Advance();
} else {
do {
int d = c0_ - '0';
if (index > 429496729U - ((d + 3) >> 3)) break;
index = (index * 10) + d;
Advance();
} while (IsDecimalDigit(c0_));
}
if (c0_ == '"') {
// Successfully parsed index, parse and store element.
AdvanceSkipWhitespace();
{
// |cursor_| will only be updated if the key ends up being an index.
DisallowHeapAllocation no_gc;
const Char* cursor = cursor_;
// Maybe an array index, try to parse it.
if (*cursor == '0') {
// With a leading zero, the string has to be "0" only to be an index.
cursor++;
} else {
cursor = std::find_if(cursor, end_, [&index](Char c) {
return !TryAddIndexChar(&index, c);
});
}
if (c0_ == ':') {
AdvanceSkipWhitespace();
Handle<Object> value = ParseJsonValue();
if (!value.is_null()) {
JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE)
.Assert();
return kElementFound;
} else {
return kNullHandle;
}
if (V8_UNLIKELY(cursor == end_)) {
ReportUnexpectedCharacter(JsonToken::EOS);
return true;
}
if (*cursor++ != '"') return false;
cursor_ = cursor;
}
return kElementNotFound;
ExpectNext(JsonToken::COLON);
Handle<Object> value = ParseJsonValue();
JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE)
.Assert();
return true;
}
// Parse a JSON object. Position must be right at '{'.
......@@ -411,66 +454,47 @@ Handle<Object> JsonParser<Char>::ParseJsonObject() {
Handle<Map> map(json_object->map(), isolate());
int descriptor = 0;
VectorSegment<ZoneVector<Handle<Object>>> properties(&properties_);
DCHECK_EQ(c0_, '{');
Consume(JsonToken::LBRACE);
bool transitioning = true;
AdvanceSkipWhitespace();
if (c0_ != '}') {
if (!Check(JsonToken::RBRACE)) {
do {
if (c0_ != '"') return ReportUnexpectedCharacter();
int start_position = position_;
Advance();
if (IsDecimalDigit(c0_)) {
ParseElementResult element_result = ParseElement(json_object);
if (element_result == kNullHandle) return Handle<Object>::null();
if (element_result == kElementFound) continue;
ExpectNext(JsonToken::STRING);
if (is_at_end() ||
(IsDecimalDigit(*cursor_) && ParseElement(json_object))) {
continue;
}
// Not an index, fallback to the slow path.
position_ = start_position;
#ifdef DEBUG
c0_ = '"';
#endif
Handle<String> key;
Handle<Object> value;
// Try to follow existing transitions as long as possible. Once we stop
// transitioning, no transition can be found anymore.
DCHECK(transitioning);
Handle<Map> target;
// First check whether there is a single expected transition. If so, try
// to parse it first.
bool follow_expected = false;
Handle<Map> target;
if (kIsOneByte) {
Handle<String> expected;
{
DisallowHeapAllocation no_gc;
TransitionsAccessor transitions(isolate(), *map, &no_gc);
key = transitions.ExpectedTransitionKey();
follow_expected = !key.is_null() && ParseJsonString(key);
// If the expected transition hits, follow it.
if (follow_expected) {
target = transitions.ExpectedTransitionTarget();
}
expected = transitions.ExpectedTransitionKey();
}
if (!follow_expected) {
// If the expected transition failed, parse an internalized string and
// try to find a matching transition.
key = ParseJsonString();
if (key.is_null()) return ReportUnexpectedCharacter();
Handle<String> key = ParseJsonString(true, expected);
// If the expected transition hits, follow it.
if (key.is_identical_to(expected)) {
DisallowHeapAllocation no_gc;
target = TransitionsAccessor(isolate(), *map, &no_gc)
.ExpectedTransitionTarget();
} else {
// If a transition was found, follow it and continue.
transitioning = TransitionsAccessor(isolate(), map)
.FindTransitionToField(key)
.ToHandle(&target);
}
if (c0_ != ':') return ReportUnexpectedCharacter();
AdvanceSkipWhitespace();
value = ParseJsonValue();
if (value.is_null()) return ReportUnexpectedCharacter();
ExpectNext(JsonToken::COLON);
Handle<Object> value = ParseJsonValue();
if (transitioning) {
PropertyDetails details =
......@@ -507,40 +531,23 @@ Handle<Object> JsonParser<Char>::ParseJsonObject() {
JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
.Check();
} while (transitioning && MatchSkipWhiteSpace(','));
} while (transitioning && Check(JsonToken::COMMA));
// If we transitioned until the very end, transition the map now.
if (transitioning) {
CommitStateToJsonObject(json_object, map, properties.GetVector());
} else {
while (MatchSkipWhiteSpace(',')) {
while (Check(JsonToken::COMMA)) {
HandleScope local_scope(isolate());
if (c0_ != '"') return ReportUnexpectedCharacter();
int start_position = position_;
Advance();
if (IsDecimalDigit(c0_)) {
ParseElementResult element_result = ParseElement(json_object);
if (element_result == kNullHandle) return Handle<Object>::null();
if (element_result == kElementFound) continue;
ExpectNext(JsonToken::STRING);
if (is_at_end() ||
(IsDecimalDigit(*cursor_) && ParseElement(json_object))) {
continue;
}
// Not an index, fallback to the slow path.
position_ = start_position;
#ifdef DEBUG
c0_ = '"';
#endif
Handle<String> key;
Handle<Object> value;
key = ParseJsonString();
if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
AdvanceSkipWhitespace();
value = ParseJsonValue();
if (value.is_null()) return ReportUnexpectedCharacter();
Handle<String> key = ParseJsonString(true);
ExpectNext(JsonToken::COLON);
Handle<Object> value = ParseJsonValue();
JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
value)
......@@ -548,18 +555,15 @@ Handle<Object> JsonParser<Char>::ParseJsonObject() {
}
}
if (c0_ != '}') {
return ReportUnexpectedCharacter();
}
Expect(JsonToken::RBRACE);
}
AdvanceSkipWhitespace();
return scope.CloseAndEscape(json_object);
}
template <typename Char>
void JsonParser<Char>::CommitStateToJsonObject(
Handle<JSObject> json_object, Handle<Map> map,
Vector<const Handle<Object>> properties) {
const Vector<const Handle<Object>>& properties) {
JSObject::AllocateStorageForMap(json_object, map);
DCHECK(!json_object->map()->is_dictionary_map());
......@@ -574,10 +578,10 @@ void JsonParser<Char>::CommitStateToJsonObject(
class ElementKindLattice {
private:
enum {
SMI_ELEMENTS,
NUMBER_ELEMENTS,
OBJECT_ELEMENTS,
enum Kind {
SMI_ELEMENTS = 0,
NUMBER_ELEMENTS = 1,
OBJECT_ELEMENTS = (1 << 1) | NUMBER_ELEMENTS,
};
public:
......@@ -587,9 +591,8 @@ class ElementKindLattice {
if (o->IsSmi()) {
return;
} else if (o->IsHeapNumber()) {
if (value_ < NUMBER_ELEMENTS) value_ = NUMBER_ELEMENTS;
value_ = static_cast<Kind>(value_ | NUMBER_ELEMENTS);
} else {
DCHECK(!o->IsNumber());
value_ = OBJECT_ELEMENTS;
}
}
......@@ -602,38 +605,30 @@ class ElementKindLattice {
return PACKED_DOUBLE_ELEMENTS;
case OBJECT_ELEMENTS:
return PACKED_ELEMENTS;
default:
UNREACHABLE();
return PACKED_ELEMENTS;
}
}
private:
int value_;
Kind value_;
};
// Parse a JSON array. Position must be right at '['.
template <typename Char>
Handle<Object> JsonParser<Char>::ParseJsonArray() {
HandleScope scope(isolate());
ZoneVector<Handle<Object>> elements(zone());
DCHECK_EQ(c0_, '[');
ZoneVector<Handle<Object>> elements(&zone_);
Consume(JsonToken::LBRACK);
ElementKindLattice lattice;
AdvanceSkipWhitespace();
if (c0_ != ']') {
if (!Check(JsonToken::RBRACK)) {
do {
Handle<Object> element = ParseJsonValue();
if (element.is_null()) return ReportUnexpectedCharacter();
elements.push_back(element);
lattice.Update(element);
} while (MatchSkipWhiteSpace(','));
if (c0_ != ']') {
return ReportUnexpectedCharacter();
}
} while (Check(JsonToken::COMMA));
Expect(JsonToken::RBRACK);
}
AdvanceSkipWhitespace();
// Allocate a fixed array with all the elements.
......@@ -667,343 +662,309 @@ Handle<Object> JsonParser<Char>::ParseJsonArray() {
}
template <typename Char>
Handle<Object> JsonParser<Char>::ParseJsonNumber() {
bool negative = false;
int beg_pos = position_;
if (c0_ == '-') {
Advance();
negative = true;
}
if (c0_ == '0') {
Advance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
} else {
uint32_t i = 0;
int digits = 0;
if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
do {
// This can overflow. That's OK, the "digits < 10" check below
// will discard overflown results.
i = i * 10 + c0_ - '0';
digits++;
Advance();
} while (IsDecimalDigit(c0_));
if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
SkipWhitespace();
return Handle<Smi>(Smi::FromInt((negative ? -static_cast<int>(i) : i)),
isolate());
}
}
if (c0_ == '.') {
Advance();
if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
do {
Advance();
} while (IsDecimalDigit(c0_));
}
if (AsciiAlphaToLower(c0_) == 'e') {
Advance();
if (c0_ == '-' || c0_ == '+') Advance();
if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
do {
Advance();
} while (IsDecimalDigit(c0_));
}
int length = position_ - beg_pos;
Handle<Object> JsonParser<Char>::ParseJsonNumber(int sign, const Char* start) {
double number;
if (kIsOneByte) {
{
DisallowHeapAllocation no_gc;
Vector<const Char> chars(chars_ + beg_pos, length);
number = StringToDouble(Vector<const uint8_t>::cast(chars),
if (*cursor_ == '0') {
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
Char c = NextCharacter();
if (c <= unibrow::Latin1::kMaxChar &&
IsNumberPart(character_scan_flags[c])) {
if (V8_UNLIKELY(IsDecimalDigit(c))) {
AllowHeapAllocation allow_before_exception;
ReportUnexpectedCharacter();
return handle(Smi::FromInt(0), isolate_);
}
} else if (sign > 0) {
return handle(Smi::FromInt(0), isolate_);
}
} else {
int32_t i = 0;
int digits = 0;
const Char* start = cursor_;
const int kMaxSmiLength = 9;
cursor_ = std::find_if(cursor_, Min(end_, cursor_ + kMaxSmiLength),
[&i, &digits](Char c) {
if (!IsDecimalDigit(c)) return true;
i = i * 10 + (c - '0');
digits++;
return false;
});
if (V8_UNLIKELY(cursor_ == start)) {
AllowHeapAllocation allow_before_exception;
ReportUnexpectedCharacter();
return handle(Smi::FromInt(0), isolate_);
}
if (is_at_end() || *cursor_ > unibrow::Latin1::kMaxChar ||
!IsNumberPart(character_scan_flags[*cursor_])) {
// Smi.
// TODO(verwaest): Cache?
return handle(Smi::FromInt(i * sign), isolate_);
}
}
cursor_ = std::find_if(cursor_, end_, [](Char c) {
return !(c <= unibrow::Latin1::kMaxChar &&
IsNumberPart(character_scan_flags[c])) ||
c == '.';
});
// If we found a period, ensure that it's followed by a decimal digit.
if (!is_at_end() && *cursor_ == '.') {
advance();
if (is_at_end()) {
AllowHeapAllocation allow_before_exception;
ReportUnexpectedCharacter(JsonToken::EOS);
return handle(Smi::FromInt(0), isolate_);
} else if (!IsDecimalDigit(*cursor_)) {
AllowHeapAllocation allow_before_exception;
ReportUnexpectedCharacter();
return handle(Smi::FromInt(0), isolate_);
} else {
cursor_ = std::find_if(cursor_, end_, [](Char c) {
return !(c <= unibrow::Latin1::kMaxChar &&
IsNumberPart(character_scan_flags[c]));
});
}
}
Vector<const uint8_t> chars;
if (kIsOneByte) {
chars = Vector<const uint8_t>::cast(
Vector<const Char>(start, cursor_ - start));
} else {
literal_buffer_.Start();
while (start++ != cursor_) {
literal_buffer_.AddChar(*start++);
}
chars = literal_buffer_.one_byte_literal();
}
number = StringToDouble(chars,
NO_FLAGS, // Hex, octal or trailing junk.
std::numeric_limits<double>::quiet_NaN());
} else {
Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
String::WriteToFlat(*source_, buffer.begin(), beg_pos, position_);
Vector<const uint8_t> result =
Vector<const uint8_t>(buffer.begin(), length);
number = StringToDouble(result,
NO_FLAGS, // Hex, octal or trailing junk.
0.0);
buffer.Dispose();
}
SkipWhitespace();
if (V8_UNLIKELY(std::isnan(number))) ReportUnexpectedCharacter();
return factory()->NewNumber(number, allocation_);
}
template <typename StringType>
inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
template <>
inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
seq_str->SeqTwoByteStringSet(i, c);
}
namespace {
template <>
inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
seq_str->SeqOneByteStringSet(i, c);
}
template <typename Char>
bool Matches(const Vector<const Char>& chars, Handle<String> string) {
if (string.is_null()) return false;
template <typename StringType>
inline Handle<StringType> NewRawString(Factory* factory, int length,
AllocationType allocation);
// Only supports internalized strings in their canonical representation (one
// byte encoded as two-byte will return false here).
if ((sizeof(Char) == 1) != string->IsOneByteRepresentation()) return false;
if (chars.length() != string->length()) return false;
template <>
inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length,
AllocationType allocation) {
return factory->NewRawTwoByteString(length, allocation).ToHandleChecked();
DisallowHeapAllocation no_gc;
const Char* string_data = string->GetChars<Char>(no_gc);
return CompareChars(chars.begin(), string_data, chars.length()) == 0;
}
template <>
inline Handle<SeqOneByteString> NewRawString(Factory* factory, int length,
AllocationType allocation) {
return factory->NewRawOneByteString(length, allocation).ToHandleChecked();
}
} // namespace
// Scans the rest of a JSON string starting from position_ and writes
// prefix[start..end] along with the scanned characters into a
// sequential string of type StringType.
template <typename Char>
template <typename StringType, typename SinkChar>
Handle<String> JsonParser<Char>::SlowScanJsonString(Handle<String> prefix,
int start, int end) {
int count = end - start;
int max_length = count + length_ - position_;
int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
Handle<StringType> seq_string =
NewRawString<StringType>(factory(), length, allocation_);
{
Handle<String> JsonParser<Char>::MakeString(bool requires_internalization,
int offset, int length) {
AllowHeapAllocation allow_gc;
DCHECK(chars_may_relocate_);
Handle<SeqOneByteString> source = Handle<SeqOneByteString>::cast(source_);
if (!requires_internalization && length > kMaxInternalizedStringValueLength) {
Handle<SeqOneByteString> result =
factory()->NewRawOneByteString(length).ToHandleChecked();
DisallowHeapAllocation no_gc;
// Copy prefix into seq_str.
SinkChar* dest = seq_string->GetChars(no_gc);
String::WriteToFlat(*prefix, dest, start, end);
uint8_t* d = result->GetChars(no_gc);
uint8_t* s = source->GetChars(no_gc) + offset;
MemCopy(d, s, length);
return result;
}
while (c0_ != '"') {
// Check for control character (0x00-0x1F) or unterminated string (<0).
if (c0_ < 0x20) return Handle<String>::null();
if (count >= length) {
// We need to create a longer sequential string for the result.
return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
}
if (c0_ != '\\') {
// If the sink can contain UC16 characters, or source_ contains only
// Latin1 characters, there's no need to test whether we can store the
// character. Otherwise check whether the UC16 source character can fit
// in the Latin1 sink.
if (sizeof(SinkChar) == kUC16Size || kIsOneByte ||
c0_ <= String::kMaxOneByteCharCode) {
SeqStringSet(seq_string, count++, c0_);
Advance();
} else {
// StringType is SeqOneByteString and we just read a non-Latin1 char.
return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
}
} else {
Advance(); // Advance past the \.
switch (c0_) {
case '"':
case '\\':
case '/':
SeqStringSet(seq_string, count++, c0_);
break;
case 'b':
SeqStringSet(seq_string, count++, '\x08');
break;
case 'f':
SeqStringSet(seq_string, count++, '\x0C');
break;
case 'n':
SeqStringSet(seq_string, count++, '\x0A');
break;
case 'r':
SeqStringSet(seq_string, count++, '\x0D');
break;
case 't':
SeqStringSet(seq_string, count++, '\x09');
break;
case 'u': {
uc32 value = 0;
for (int i = 0; i < 4; i++) {
Advance();
int digit = HexValue(c0_);
if (digit < 0) {
return Handle<String>::null();
}
value = value * 16 + digit;
}
if (sizeof(SinkChar) == kUC16Size ||
value <= String::kMaxOneByteCharCode) {
SeqStringSet(seq_string, count++, value);
break;
} else {
// StringType is SeqOneByteString and we just read a non-Latin1
// char.
position_ -= 6; // Rewind position_ to \ in \uxxxx.
Advance();
return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0,
count);
}
}
default:
return Handle<String>::null();
}
Advance();
return factory()->InternalizeOneByteString(source, offset, length);
}
template <typename Char>
template <typename LiteralChar>
Handle<String> JsonParser<Char>::MakeString(
bool requires_internalization, const Vector<const LiteralChar>& chars) {
AllowHeapAllocation allow_gc;
DCHECK_IMPLIES(
chars_may_relocate_,
chars.begin() == literal_buffer_.literal<LiteralChar>().begin());
if (!requires_internalization &&
chars.length() > kMaxInternalizedStringValueLength) {
if (sizeof(LiteralChar) == 1) {
return factory()
->NewStringFromOneByte(Vector<const uint8_t>::cast(chars),
allocation_)
.ToHandleChecked();
}
return factory()
->NewStringFromTwoByte(Vector<const uint16_t>::cast(chars), allocation_)
.ToHandleChecked();
}
DCHECK_EQ('"', c0_);
// Advance past the last '"'.
AdvanceSkipWhitespace();
// Shrink seq_string length to count and return.
return SeqString::Truncate(seq_string, count);
SequentialStringKey<LiteralChar> key(chars, hash_seed_);
return StringTable::LookupKey(isolate_, &key);
}
template <typename Char>
Handle<String> JsonParser<Char>::ScanJsonString() {
DCHECK_EQ('"', c0_);
Advance();
if (c0_ == '"') {
AdvanceSkipWhitespace();
return factory()->empty_string();
}
Handle<String> JsonParser<Char>::ParseJsonString(bool requires_internalization,
Handle<String> hint) {
// First try to fast scan without buffering in case the string doesn't have
// escaped sequences. Always buffer two-byte input strings as the scanned
// substring can be one-byte.
if (kIsOneByte) {
// Fast path for existing internalized strings. If the the string being
// parsed is not a known internalized string, contains backslashes or
// unexpectedly reaches the end of string, return with an empty handle.
DisallowHeapAllocation no_gc;
const Char* start = cursor_;
// We intentionally use local variables instead of fields, compute hash
// while we are iterating a string and manually inline StringTable lookup
// here.
while (true) {
cursor_ = std::find_if(cursor_, end_, [](Char c) {
return MayTerminateString(character_scan_flags[c]);
});
if (V8_UNLIKELY(is_at_end())) break;
if (*cursor_ == '"') {
Handle<String> result;
Vector<const Char> chars(start, cursor_ - start);
if (Matches(chars, hint)) {
result = hint;
} else if (chars_may_relocate_) {
result = MakeString(requires_internalization,
static_cast<int>(start - chars_),
static_cast<int>(cursor_ - start));
} else {
result = MakeString(requires_internalization,
Vector<const uint8_t>::cast(chars));
}
advance();
return result;
}
int position = position_;
uc32 c0 = c0_;
uint32_t running_hash = static_cast<uint32_t>(HashSeed(isolate()));
uint32_t index = 0;
bool is_array_index = true;
if (*cursor_ == '\\') break;
do {
if (c0 == '\\') {
c0_ = c0;
int beg_pos = position_;
position_ = position;
return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
position_);
}
if (c0 < 0x20) {
c0_ = c0;
position_ = position;
return Handle<String>::null();
DCHECK_LT(*cursor_, 0x20);
AllowHeapAllocation allow_before_exception;
ReportUnexpectedCharacter();
return factory()->empty_string();
}
// We hit an escape sequence. Start buffering.
// TODO(verwaest): MemCopy.
literal_buffer_.Start();
while (start != cursor_) {
literal_buffer_.AddChar(*start++);
}
} else {
literal_buffer_.Start();
}
while (true) {
cursor_ = std::find_if(cursor_, end_, [this](Char c) {
if (V8_UNLIKELY(c > unibrow::Latin1::kMaxChar)) {
AddLiteralChar(c);
return false;
}
if (is_array_index) {
// With leading zero, the string has to be "0" to be a valid index.
if (!IsDecimalDigit(c0) || (position > position_ && index == 0)) {
is_array_index = false;
} else {
int d = c0 - '0';
is_array_index = index <= 429496729U - ((d + 3) >> 3);
index = (index * 10) + d;
}
if (MayTerminateString(character_scan_flags[c])) {
return true;
}
running_hash = StringHasher::AddCharacterCore(running_hash,
static_cast<uint16_t>(c0));
position++;
if (position >= length_) {
c0_ = kEndOfString;
position_ = position;
return Handle<String>::null();
AddLiteralChar(c);
return false;
});
if (V8_UNLIKELY(is_at_end())) break;
if (*cursor_ == '"') {
Handle<String> result;
if (literal_buffer_.is_one_byte()) {
Vector<const uint8_t> chars = literal_buffer_.one_byte_literal();
result = Matches(chars, hint)
? hint
: MakeString(requires_internalization, chars);
} else {
Vector<const uint16_t> chars = literal_buffer_.two_byte_literal();
result = Matches(chars, hint)
? hint
: MakeString(requires_internalization, chars);
}
c0 = chars_[position];
} while (c0 != '"');
int length = position - position_;
uint32_t hash;
if (is_array_index) {
hash =
StringHasher::MakeArrayIndexHash(index, length) >> String::kHashShift;
} else if (length <= String::kMaxHashCalcLength) {
hash = StringHasher::GetHashCore(running_hash);
} else {
hash = static_cast<uint32_t>(length);
advance();
return result;
}
StringTable string_table = isolate()->heap()->string_table();
uint32_t capacity = string_table->Capacity();
uint32_t entry = StringTable::FirstProbe(hash, capacity);
uint32_t count = 1;
Handle<String> result;
while (true) {
Object element = string_table->KeyAt(entry);
if (element->IsUndefined(isolate())) {
// Lookup failure.
result = Internalize(position_, length);
break;
if (*cursor_ == '\\') {
uc32 c = NextCharacter();
if (V8_UNLIKELY(c > unibrow::Latin1::kMaxChar)) {
ReportUnexpectedCharacter();
return factory()->empty_string();
}
if (!element->IsTheHole(isolate())) {
DisallowHeapAllocation no_gc;
Vector<const Char> string_vector(chars_ + position_, length);
if (String::cast(element)->IsOneByteEqualTo(
Vector<const uint8_t>::cast(string_vector))) {
result = Handle<String>(String::cast(element), isolate());
DCHECK_EQ(result->Hash(),
(hash << String::kHashShift) >> String::kHashShift);
uc32 value;
switch (GetEscapeKind(character_scan_flags[c])) {
case EscapeKind::kSelf:
value = c;
break;
case EscapeKind::kBackspace:
value = '\x08';
break;
case EscapeKind::kTab:
value = '\x09';
break;
case EscapeKind::kNewLine:
value = '\x0A';
break;
case EscapeKind::kFormFeed:
value = '\x0C';
break;
case EscapeKind::kCarriageReturn:
value = '\x0D';
break;
case EscapeKind::kUnicode: {
value = 0;
for (int i = 0; i < 4; i++) {
int digit = HexValue(NextCharacter());
if (V8_UNLIKELY(digit < 0)) {
ReportUnexpectedCharacter();
return factory()->empty_string();
}
value = value * 16 + digit;
}
break;
}
}
entry = StringTable::NextProbe(entry, count++, capacity);
}
position_ = position;
// Advance past the last '"'.
AdvanceSkipWhitespace();
return result;
}
int beg_pos = position_;
// Fast case for Latin1 only without escape characters.
do {
// Check for control character (0x00-0x1F) or unterminated string (<0).
if (c0_ < 0x20) return Handle<String>::null();
if (c0_ != '\\') {
if (kIsOneByte || c0_ <= String::kMaxOneByteCharCode) {
Advance();
} else {
return SlowScanJsonString<SeqTwoByteString, uc16>(source_, beg_pos,
position_);
case EscapeKind::kIllegal:
ReportUnexpectedCharacter();
return factory()->empty_string();
}
} else {
return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
position_);
}
} while (c0_ != '"');
int length = position_ - beg_pos;
Handle<String> result =
factory()->NewRawOneByteString(length, allocation_).ToHandleChecked();
DisallowHeapAllocation no_gc;
uint8_t* dest = SeqOneByteString::cast(*result)->GetChars(no_gc);
String::WriteToFlat(*source_, dest, beg_pos, position_);
DCHECK_EQ('"', c0_);
// Advance past the last '"'.
AdvanceSkipWhitespace();
return result;
}
AddLiteralChar(value);
advance();
continue;
}
template <>
Handle<String> JsonParser<uint8_t>::Internalize(int start, int length) {
if (StringShape(*source_).IsExternal()) {
return factory()->InternalizeOneByteString(
Vector<const uint8_t>(chars_ + start, length));
DCHECK_LT(*cursor_, 0x20);
AllowHeapAllocation allow_before_exception;
ReportUnexpectedCharacter();
return factory()->empty_string();
}
Handle<SeqString> seq = Handle<SeqString>::cast(source_);
return factory()->InternalizeOneByteString(seq, start, length);
}
template <>
Handle<String> JsonParser<uint16_t>::Internalize(int start, int length) {
UNREACHABLE();
ReportUnexpectedCharacter(JsonToken::EOS);
return factory()->empty_string();
}
// Explicit instantiation.
......
......@@ -8,12 +8,13 @@
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects.h"
#include "src/parsing/literal-buffer.h"
#include "src/zone/zone-containers.h"
namespace v8 {
namespace internal {
enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle };
enum ParseElementResult { kElementFound, kElementNotFound };
class JsonParseInternalizer {
public:
......@@ -34,6 +35,24 @@ class JsonParseInternalizer {
Handle<JSReceiver> reviver_;
};
enum class JsonToken : uint8_t {
NUMBER,
NEGATIVE_NUMBER,
STRING,
LBRACE,
RBRACE,
LBRACK,
RBRACK,
TRUE_LITERAL,
FALSE_LITERAL,
NULL_LITERAL,
WHITESPACE,
COLON,
COMMA,
ILLEGAL,
EOS
};
// A simple json parser.
template <typename Char>
class JsonParser final {
......@@ -55,7 +74,12 @@ class JsonParser final {
static const int kEndOfString = -1;
private:
Handle<String> Internalize(int start, int length);
template <typename LiteralChar>
Handle<String> MakeString(bool requires_internalization,
const Vector<const LiteralChar>& chars);
Handle<String> MakeString(bool requires_internalization, int offset,
int length);
JsonParser(Isolate* isolate, Handle<String> source);
~JsonParser();
......@@ -63,38 +87,67 @@ class JsonParser final {
// Parse a string containing a single JSON value.
MaybeHandle<Object> ParseJson();
V8_INLINE void Advance();
void advance() { ++cursor_; }
Char NextCharacter();
V8_INLINE JsonToken peek() const { return next_; }
void Consume(JsonToken token) {
DCHECK_EQ(peek(), token);
advance();
}
void Expect(JsonToken token) {
if (V8_LIKELY(peek() == token)) {
advance();
} else {
ReportUnexpectedCharacter(peek());
}
}
void ExpectNext(JsonToken token) {
SkipWhitespace();
Expect(token);
}
bool Check(JsonToken token) {
SkipWhitespace();
if (next_ != token) return false;
advance();
return true;
}
template <size_t N>
void ScanLiteral(const char (&s)[N]) {
DCHECK(!is_at_end());
if (V8_UNLIKELY(static_cast<size_t>(end_ - cursor_) < N - 1)) {
ReportUnexpectedCharacter(JsonToken::EOS);
return;
}
// There's at least 1 character, we always consume a character and compare
// the next character. The first character was compared before we jumped to
// ScanLiteral.
STATIC_ASSERT(N > 2);
if (V8_LIKELY(CompareChars(s + 1, cursor_ + 1, N - 2) == 0)) {
cursor_ += N - 1;
} else {
ReportUnexpectedCharacter();
}
}
// The JSON lexical grammar is specified in the ECMAScript 5 standard,
// section 15.12.1.1. The only allowed whitespace characters between tokens
// are tab, carriage-return, newline and space.
V8_INLINE void AdvanceSkipWhitespace();
V8_INLINE void SkipWhitespace();
V8_INLINE uc32 AdvanceGetChar();
// Checks that current charater is c.
// If so, then consume c and skip whitespace.
V8_INLINE bool MatchSkipWhiteSpace(uc32 c);
void SkipWhitespace();
// A JSON string (production JSONString) is subset of valid JavaScript string
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
bool ParseJsonString(Handle<String> expected);
Handle<String> ParseJsonString() {
Handle<String> result = ScanJsonString();
if (result.is_null()) return result;
return factory()->InternalizeString(result);
}
Handle<String> ParseJsonString(bool requires_internalization,
Handle<String> expected = Handle<String>());
Handle<String> ScanJsonString();
// Creates a new string and copies prefix[start..end] into the beginning
// of it. Then scans the rest of the string, adding characters after the
// prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char.
template <typename StringType, typename SinkChar>
Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
......@@ -102,7 +155,7 @@ class JsonParser final {
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Handle<Object> ParseJsonNumber();
Handle<Object> ParseJsonNumber(int sign, const Char* start);
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
......@@ -118,8 +171,9 @@ class JsonParser final {
Handle<Object> ParseJsonObject();
// Helper for ParseJsonObject. Parses the form "123": obj, which is recorded
// as an element, not a property.
ParseElementResult ParseElement(Handle<JSObject> json_object);
// as an element, not a property. Returns false if we should retry parsing the
// key as a non-element. (Returns true if it's an index or hits EOS).
bool ParseElement(Handle<JSObject> json_object);
// Parses a JSON array literal (grammar production JSONArray). An array
// literal is a square-bracketed and comma separated sequence (possibly empty)
......@@ -128,12 +182,8 @@ class JsonParser final {
// it allow a terminal comma, like a JavaScript array does.
Handle<Object> ParseJsonArray();
// Mark that a parsing error has happened at the current token, and
// return a null handle. Primarily for readability.
inline Handle<Object> ReportUnexpectedCharacter() {
return Handle<Object>::null();
}
// Mark that a parsing error has happened at the current character.
void ReportUnexpectedCharacter(JsonToken token = JsonToken::ILLEGAL);
inline Isolate* isolate() { return isolate_; }
inline Factory* factory() { return isolate_->factory(); }
......@@ -151,48 +201,58 @@ class JsonParser final {
DisallowHeapAllocation no_gc;
const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
if (chars_ != chars) {
size_t position = cursor_ - chars_;
size_t length = end_ - chars_;
chars_ = chars;
cursor_ = chars_ + position;
end_ = chars_ + length;
}
}
private:
static const bool kIsOneByte = sizeof(Char) == 1;
static const int kMaxInternalizedStringValueLength = 25;
Zone* zone() { return &zone_; }
// Casts |c| to uc32 avoiding LiteralBuffer::AddChar(char) in one-byte-strings
// with escapes that can result in two-byte strings.
void AddLiteralChar(uc32 c) { literal_buffer_.AddChar(c); }
void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
Vector<const Handle<Object>> properties);
const Vector<const Handle<Object>>& properties);
bool is_at_end() const {
DCHECK_LE(cursor_, end_);
return cursor_ == end_;
}
int position() const { return static_cast<int>(cursor_ - chars_); }
Isolate* isolate_;
Zone zone_;
const uint64_t hash_seed_;
AllocationType allocation_;
Handle<JSFunction> object_constructor_;
const Handle<String> original_source_;
Handle<String> source_;
int offset_;
int length_;
// Cached pointer to the raw chars in source. In case source is on-heap, we
// register an UpdatePointers callback. For this reason, chars_ should never
// be locally cached across a possible allocation. The scope in which we
// cache chars has to be guarded by a DisallowHeapAllocation scope.
// TODO(verwaest): Move chars_ and functions that operate over chars to a
// separate helper class that makes it clear that all functions need to be
// guarded.
// register an UpdatePointers callback. For this reason, chars_, cursor_ and
// end_ should never be locally cached across a possible allocation. The scope
// in which we cache chars has to be guarded by a DisallowHeapAllocation
// scope.
const Char* chars_;
const Char* cursor_;
const Char* end_;
uc32 c0_;
int position_;
JsonToken next_;
LiteralBuffer literal_buffer_;
// Indicates whether the bytes underneath source_ can relocate during GC.
bool chars_may_relocate_;
// Property handles are stored here inside ParseJsonObject.
ZoneVector<Handle<Object>> properties_;
};
template <>
Handle<String> JsonParser<uint8_t>::Internalize(int start, int length);
template <>
Handle<String> JsonParser<uint16_t>::Internalize(int start, int length);
// Explicit instantiation declarations.
extern template class JsonParser<uint8_t>;
extern template class JsonParser<uint16_t>;
......
......@@ -6477,23 +6477,21 @@ class RegExpKey : public HashTableKey {
Smi flags_;
};
Handle<String> OneByteStringKey::AsHandle(Isolate* isolate) {
return isolate->factory()->NewOneByteInternalizedString(string_, HashField());
}
Handle<String> TwoByteStringKey::AsHandle(Isolate* isolate) {
return isolate->factory()->NewTwoByteInternalizedString(string_, HashField());
}
Handle<String> SeqOneByteSubStringKey::AsHandle(Isolate* isolate) {
return isolate->factory()->NewOneByteInternalizedSubString(
string_, from_, length_, HashField());
}
bool SeqOneByteSubStringKey::IsMatch(Object string) {
bool SeqOneByteSubStringKey::IsMatch(Object object) {
DisallowHeapAllocation no_gc;
Vector<const uint8_t> chars(string_->GetChars(no_gc) + from_, length_);
return String::cast(string)->IsOneByteEqualTo(chars);
String string = String::cast(object);
if (string.length() != length_) return false;
if (string.IsOneByteRepresentation()) {
const uint8_t* data = string.GetChars<uint8_t>(no_gc);
return CompareChars(string_->GetChars(no_gc) + from_, data, length_) == 0;
}
const uint16_t* data = string.GetChars<uint16_t>(no_gc);
return CompareChars(string_->GetChars(no_gc) + from_, data, length_) == 0;
}
// InternalizedStringKey carries a string/internalized-string object as key.
......
......@@ -19,7 +19,7 @@
#include "src/objects/shared-function-info.h"
#include "src/objects/templates-inl.h"
#include "src/property.h"
#include "src/transitions.h"
#include "src/transitions-inl.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
......
......@@ -195,29 +195,45 @@ Char FlatStringReader::Get(int index) {
}
template <typename Char>
class SequentialStringKey : public StringTableKey {
class SequentialStringKey final : public StringTableKey {
public:
explicit SequentialStringKey(Vector<const Char> string, uint64_t seed)
: StringTableKey(StringHasher::HashSequentialString<Char>(
string.begin(), string.length(), seed)),
string_(string) {}
SequentialStringKey(const Vector<const Char>& chars, uint64_t seed)
: SequentialStringKey(StringHasher::HashSequentialString<Char>(
chars.begin(), chars.length(), seed),
chars) {}
Vector<const Char> string_;
};
SequentialStringKey(int hash, const Vector<const Char>& chars)
: StringTableKey(hash), chars_(chars) {}
class OneByteStringKey : public SequentialStringKey<uint8_t> {
public:
OneByteStringKey(Vector<const uint8_t> str, uint64_t seed)
: SequentialStringKey<uint8_t>(str, seed) {}
bool IsMatch(Object other) override {
DisallowHeapAllocation no_gc;
String s = String::cast(other);
if (s.length() != chars_.length()) return false;
if (s->IsOneByteRepresentation()) {
const uint8_t* chars = s.GetChars<uint8_t>(no_gc);
return CompareChars(chars, chars_.begin(), chars_.length()) == 0;
}
const uint16_t* chars = s.GetChars<uint16_t>(no_gc);
return CompareChars(chars, chars_.begin(), chars_.length()) == 0;
}
bool IsMatch(Object string) override {
return String::cast(string)->IsOneByteEqualTo(string_);
Handle<String> AsHandle(Isolate* isolate) override {
if (sizeof(Char) == 1) {
return isolate->factory()->NewOneByteInternalizedString(
Vector<const uint8_t>::cast(chars_), HashField());
}
return isolate->factory()->NewTwoByteInternalizedString(
Vector<const uint16_t>::cast(chars_), HashField());
}
Handle<String> AsHandle(Isolate* isolate) override;
private:
Vector<const Char> chars_;
};
class SeqOneByteSubStringKey : public StringTableKey {
using OneByteStringKey = SequentialStringKey<uint8_t>;
using TwoByteStringKey = SequentialStringKey<uint16_t>;
class SeqOneByteSubStringKey final : public StringTableKey {
public:
// VS 2017 on official builds gives this spurious warning:
// warning C4789: buffer 'key' of size 16 bytes will be overrun; 4 bytes will
......@@ -253,18 +269,6 @@ class SeqOneByteSubStringKey : public StringTableKey {
int length_;
};
class TwoByteStringKey : public SequentialStringKey<uc16> {
public:
explicit TwoByteStringKey(Vector<const uc16> str, uint64_t seed)
: SequentialStringKey<uc16>(str, seed) {}
bool IsMatch(Object string) override {
return String::cast(string)->IsTwoByteEqualTo(string_);
}
Handle<String> AsHandle(Isolate* isolate) override;
};
bool String::Equals(String other) {
if (other == *this) return true;
if (this->IsInternalizedString() && other->IsInternalizedString()) {
......@@ -281,6 +285,13 @@ bool String::Equals(Isolate* isolate, Handle<String> one, Handle<String> two) {
return SlowEquals(isolate, one, two);
}
template <typename Char>
const Char* String::GetChars(const DisallowHeapAllocation& no_gc) {
return StringShape(*this).IsExternal()
? CharTraits<Char>::ExternalString::cast(*this).GetChars()
: CharTraits<Char>::String::cast(*this).GetChars(no_gc);
}
Handle<String> String::Flatten(Isolate* isolate, Handle<String> string,
AllocationType allocation) {
if (string->IsConsString()) {
......
......@@ -146,6 +146,10 @@ class String : public Name {
V8_INLINE Vector<const Char> GetCharVector(
const DisallowHeapAllocation& no_gc);
// Get chars from sequential or external strings.
template <typename Char>
inline const Char* GetChars(const DisallowHeapAllocation& no_gc);
// Get and set the length of the string.
inline int length() const;
inline void set_length(int value);
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/parsing/literal-buffer.h"
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/memcopy.h"
namespace v8 {
namespace internal {
Handle<String> LiteralBuffer::Internalize(Isolate* isolate) const {
if (is_one_byte()) {
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
}
return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
}
int LiteralBuffer::NewCapacity(int min_capacity) {
return min_capacity < (kMaxGrowth / (kGrowthFactor - 1))
? min_capacity * kGrowthFactor
: min_capacity + kMaxGrowth;
}
void LiteralBuffer::ExpandBuffer() {
int min_capacity = Max(kInitialCapacity, backing_store_.length());
Vector<byte> new_store = Vector<byte>::New(NewCapacity(min_capacity));
if (position_ > 0) {
MemCopy(new_store.begin(), backing_store_.begin(), position_);
}
backing_store_.Dispose();
backing_store_ = new_store;
}
void LiteralBuffer::ConvertToTwoByte() {
DCHECK(is_one_byte());
Vector<byte> new_store;
int new_content_size = position_ * kUC16Size;
if (new_content_size >= backing_store_.length()) {
// Ensure room for all currently read code units as UC16 as well
// as the code unit about to be stored.
new_store = Vector<byte>::New(NewCapacity(new_content_size));
} else {
new_store = backing_store_;
}
uint8_t* src = backing_store_.begin();
uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.begin());
for (int i = position_ - 1; i >= 0; i--) {
dst[i] = src[i];
}
if (new_store.begin() != backing_store_.begin()) {
backing_store_.Dispose();
backing_store_ = new_store;
}
position_ = new_content_size;
is_one_byte_ = false;
}
void LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
DCHECK(!is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
if (code_unit <=
static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
position_ += kUC16Size;
} else {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::LeadSurrogate(code_unit);
position_ += kUC16Size;
if (position_ >= backing_store_.length()) ExpandBuffer();
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::TrailSurrogate(code_unit);
position_ += kUC16Size;
}
}
} // namespace internal
} // namespace v8
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_PARSING_LITERAL_BUFFER_H_
#define V8_PARSING_LITERAL_BUFFER_H_
#include "src/unicode-decoder.h"
#include "src/vector.h"
namespace v8 {
namespace internal {
// LiteralBuffer - Collector of chars of literals.
class LiteralBuffer final {
public:
LiteralBuffer() : backing_store_(), position_(0), is_one_byte_(true) {}
~LiteralBuffer() { backing_store_.Dispose(); }
V8_INLINE void AddChar(char code_unit) {
DCHECK(IsValidAscii(code_unit));
AddOneByteChar(static_cast<byte>(code_unit));
}
V8_INLINE void AddChar(uc32 code_unit) {
if (is_one_byte()) {
if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
AddOneByteChar(static_cast<byte>(code_unit));
return;
}
ConvertToTwoByte();
}
AddTwoByteChar(code_unit);
}
bool is_one_byte() const { return is_one_byte_; }
bool Equals(Vector<const char> keyword) const {
return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.begin(), backing_store_.begin(), position_) == 0);
}
Vector<const uint16_t> two_byte_literal() const {
return literal<uint16_t>();
}
Vector<const uint8_t> one_byte_literal() const { return literal<uint8_t>(); }
template <typename Char>
Vector<const Char> literal() const {
DCHECK_EQ(is_one_byte_, sizeof(Char) == 1);
DCHECK_EQ(position_ & (sizeof(Char) - 1), 0);
return Vector<const Char>(
reinterpret_cast<const Char*>(backing_store_.begin()),
position_ >> (sizeof(Char) - 1));
}
int length() const { return is_one_byte() ? position_ : (position_ >> 1); }
void Start() {
position_ = 0;
is_one_byte_ = true;
}
Handle<String> Internalize(Isolate* isolate) const;
private:
static const int kInitialCapacity = 16;
static const int kGrowthFactor = 4;
static const int kMaxGrowth = 1 * MB;
inline bool IsValidAscii(char code_unit) {
// Control characters and printable characters span the range of
// valid ASCII characters (0-127). Chars are unsigned on some
// platforms which causes compiler warnings if the validity check
// tests the lower bound >= 0 as it's always true.
return iscntrl(code_unit) || isprint(code_unit);
}
V8_INLINE void AddOneByteChar(byte one_byte_char) {
DCHECK(is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
backing_store_[position_] = one_byte_char;
position_ += kOneByteSize;
}
void AddTwoByteChar(uc32 code_unit);
int NewCapacity(int min_capacity);
void ExpandBuffer();
void ConvertToTwoByte();
Vector<byte> backing_store_;
int position_;
bool is_one_byte_;
DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
};
} // namespace internal
} // namespace v8
#endif // V8_PARSING_LITERAL_BUFFER_H_
......@@ -54,74 +54,6 @@ class Scanner::ErrorState {
Scanner::Location const old_location_;
};
// ----------------------------------------------------------------------------
// Scanner::LiteralBuffer
Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
if (is_one_byte()) {
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
}
return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
}
int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
return min_capacity < (kMaxGrowth / (kGrowthFactor - 1))
? min_capacity * kGrowthFactor
: min_capacity + kMaxGrowth;
}
void Scanner::LiteralBuffer::ExpandBuffer() {
int min_capacity = Max(kInitialCapacity, backing_store_.length());
Vector<byte> new_store = Vector<byte>::New(NewCapacity(min_capacity));
if (position_ > 0) {
MemCopy(new_store.begin(), backing_store_.begin(), position_);
}
backing_store_.Dispose();
backing_store_ = new_store;
}
void Scanner::LiteralBuffer::ConvertToTwoByte() {
DCHECK(is_one_byte());
Vector<byte> new_store;
int new_content_size = position_ * kUC16Size;
if (new_content_size >= backing_store_.length()) {
// Ensure room for all currently read code units as UC16 as well
// as the code unit about to be stored.
new_store = Vector<byte>::New(NewCapacity(new_content_size));
} else {
new_store = backing_store_;
}
uint8_t* src = backing_store_.begin();
uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.begin());
for (int i = position_ - 1; i >= 0; i--) {
dst[i] = src[i];
}
if (new_store.begin() != backing_store_.begin()) {
backing_store_.Dispose();
backing_store_ = new_store;
}
position_ = new_content_size;
is_one_byte_ = false;
}
void Scanner::LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
DCHECK(!is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
if (code_unit <=
static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
position_ += kUC16Size;
} else {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::LeadSurrogate(code_unit);
position_ += kUC16Size;
if (position_ >= backing_store_.length()) ExpandBuffer();
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::TrailSurrogate(code_unit);
position_ += kUC16Size;
}
}
// ----------------------------------------------------------------------------
// Scanner::BookmarkScope
......
......@@ -14,6 +14,7 @@
#include "src/char-predicates.h"
#include "src/globals.h"
#include "src/message-template.h"
#include "src/parsing/literal-buffer.h"
#include "src/parsing/token.h"
#include "src/pointer-with-payload.h"
#include "src/unicode.h"
......@@ -423,92 +424,6 @@ class V8_EXPORT_PRIVATE Scanner {
// escape sequences are allowed.
class ErrorState;
// LiteralBuffer - Collector of chars of literals.
class LiteralBuffer {
public:
LiteralBuffer() : backing_store_(), position_(0), is_one_byte_(true) {}
~LiteralBuffer() { backing_store_.Dispose(); }
V8_INLINE void AddChar(char code_unit) {
DCHECK(IsValidAscii(code_unit));
AddOneByteChar(static_cast<byte>(code_unit));
}
V8_INLINE void AddChar(uc32 code_unit) {
if (is_one_byte()) {
if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
AddOneByteChar(static_cast<byte>(code_unit));
return;
}
ConvertToTwoByte();
}
AddTwoByteChar(code_unit);
}
bool is_one_byte() const { return is_one_byte_; }
bool Equals(Vector<const char> keyword) const {
return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.begin(), backing_store_.begin(), position_) == 0);
}
Vector<const uint16_t> two_byte_literal() const {
DCHECK(!is_one_byte());
DCHECK_EQ(position_ & 0x1, 0);
return Vector<const uint16_t>(
reinterpret_cast<const uint16_t*>(backing_store_.begin()),
position_ >> 1);
}
Vector<const uint8_t> one_byte_literal() const {
DCHECK(is_one_byte());
return Vector<const uint8_t>(
reinterpret_cast<const uint8_t*>(backing_store_.begin()), position_);
}
int length() const { return is_one_byte() ? position_ : (position_ >> 1); }
void Start() {
position_ = 0;
is_one_byte_ = true;
}
Handle<String> Internalize(Isolate* isolate) const;
private:
static const int kInitialCapacity = 16;
static const int kGrowthFactor = 4;
static const int kMaxGrowth = 1 * MB;
inline bool IsValidAscii(char code_unit) {
// Control characters and printable characters span the range of
// valid ASCII characters (0-127). Chars are unsigned on some
// platforms which causes compiler warnings if the validity check
// tests the lower bound >= 0 as it's always true.
return iscntrl(code_unit) || isprint(code_unit);
}
V8_INLINE void AddOneByteChar(byte one_byte_char) {
DCHECK(is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
backing_store_[position_] = one_byte_char;
position_ += kOneByteSize;
}
void AddTwoByteChar(uc32 code_unit);
int NewCapacity(int min_capacity);
void ExpandBuffer();
void ConvertToTwoByte();
Vector<byte> backing_store_;
int position_;
bool is_one_byte_;
DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
};
// The current and look-ahead token.
struct TokenDesc {
Location location = {0, 0};
......
......@@ -176,6 +176,49 @@ int TransitionArray::SearchName(Name name, int* out_insertion_index) {
out_insertion_index);
}
TransitionsAccessor::TransitionsAccessor(Isolate* isolate, Map map,
DisallowHeapAllocation* no_gc)
: isolate_(isolate), map_(map) {
Initialize();
USE(no_gc);
}
TransitionsAccessor::TransitionsAccessor(Isolate* isolate, Handle<Map> map)
: isolate_(isolate), map_handle_(map), map_(*map) {
Initialize();
}
void TransitionsAccessor::Reload() {
DCHECK(!map_handle_.is_null());
map_ = *map_handle_;
Initialize();
}
void TransitionsAccessor::Initialize() {
raw_transitions_ = map_->raw_transitions();
HeapObject heap_object;
if (raw_transitions_->IsSmi() || raw_transitions_->IsCleared()) {
encoding_ = kUninitialized;
} else if (raw_transitions_->IsWeak()) {
encoding_ = kWeakRef;
} else if (raw_transitions_->GetHeapObjectIfStrong(&heap_object)) {
if (heap_object->IsTransitionArray()) {
encoding_ = kFullTransitionArray;
} else if (heap_object->IsPrototypeInfo()) {
encoding_ = kPrototypeInfo;
} else {
DCHECK(map_->is_deprecated());
DCHECK(heap_object->IsMap());
encoding_ = kMigrationTarget;
}
} else {
UNREACHABLE();
}
#if DEBUG
needs_reload_ = false;
#endif
}
int TransitionArray::number_of_transitions() const {
if (length() < kFirstIndex) return 0;
return Get(kTransitionLengthIndex).ToSmi().value();
......@@ -243,6 +286,33 @@ void TransitionArray::SetNumberOfTransitions(int number_of_transitions) {
MaybeObject::FromSmi(Smi::FromInt(number_of_transitions)));
}
Handle<String> TransitionsAccessor::ExpectedTransitionKey() {
DisallowHeapAllocation no_gc;
switch (encoding()) {
case kPrototypeInfo:
case kUninitialized:
case kMigrationTarget:
case kFullTransitionArray:
return Handle<String>::null();
case kWeakRef: {
Map target = Map::cast(raw_transitions_->GetHeapObjectAssumeWeak());
PropertyDetails details = GetSimpleTargetDetails(target);
if (details.location() != kField) return Handle<String>::null();
DCHECK_EQ(kData, details.kind());
if (details.attributes() != NONE) return Handle<String>::null();
Name name = GetSimpleTransitionKey(target);
if (!name->IsString()) return Handle<String>::null();
return handle(String::cast(name), isolate_);
}
}
UNREACHABLE();
}
Handle<Map> TransitionsAccessor::ExpectedTransitionTarget() {
DCHECK(!ExpectedTransitionKey().is_null());
return handle(GetTarget(0), isolate_);
}
} // namespace internal
} // namespace v8
......
......@@ -11,31 +11,6 @@
namespace v8 {
namespace internal {
void TransitionsAccessor::Initialize() {
raw_transitions_ = map_->raw_transitions();
HeapObject heap_object;
if (raw_transitions_->IsSmi() || raw_transitions_->IsCleared()) {
encoding_ = kUninitialized;
} else if (raw_transitions_->IsWeak()) {
encoding_ = kWeakRef;
} else if (raw_transitions_->GetHeapObjectIfStrong(&heap_object)) {
if (heap_object->IsTransitionArray()) {
encoding_ = kFullTransitionArray;
} else if (heap_object->IsPrototypeInfo()) {
encoding_ = kPrototypeInfo;
} else {
DCHECK(map_->is_deprecated());
DCHECK(heap_object->IsMap());
encoding_ = kMigrationTarget;
}
} else {
UNREACHABLE();
}
#if DEBUG
needs_reload_ = false;
#endif
}
Map TransitionsAccessor::GetSimpleTransition() {
switch (encoding()) {
case kWeakRef:
......@@ -262,33 +237,6 @@ MaybeHandle<Map> TransitionsAccessor::FindTransitionToDataProperty(
return Handle<Map>(target, isolate_);
}
Handle<String> TransitionsAccessor::ExpectedTransitionKey() {
DisallowHeapAllocation no_gc;
switch (encoding()) {
case kPrototypeInfo:
case kUninitialized:
case kMigrationTarget:
case kFullTransitionArray:
return Handle<String>::null();
case kWeakRef: {
Map target = Map::cast(raw_transitions_->GetHeapObjectAssumeWeak());
PropertyDetails details = GetSimpleTargetDetails(target);
if (details.location() != kField) return Handle<String>::null();
DCHECK_EQ(kData, details.kind());
if (details.attributes() != NONE) return Handle<String>::null();
Name name = GetSimpleTransitionKey(target);
if (!name->IsString()) return Handle<String>::null();
return handle(String::cast(name), isolate_);
}
}
UNREACHABLE();
}
Handle<Map> TransitionsAccessor::ExpectedTransitionTarget() {
DCHECK(!ExpectedTransitionKey().is_null());
return handle(GetTarget(0), isolate_);
}
bool TransitionsAccessor::CanHaveMoreTransitions() {
if (map_->is_dictionary_map()) return false;
if (encoding() == kFullTransitionArray) {
......
......@@ -38,16 +38,9 @@ namespace internal {
// cleared when the map they refer to is not otherwise reachable.
class V8_EXPORT_PRIVATE TransitionsAccessor {
public:
TransitionsAccessor(Isolate* isolate, Map map, DisallowHeapAllocation* no_gc)
: isolate_(isolate), map_(map) {
Initialize();
USE(no_gc);
}
TransitionsAccessor(Isolate* isolate, Handle<Map> map)
: isolate_(isolate), map_handle_(map), map_(*map) {
Initialize();
}
inline TransitionsAccessor(Isolate* isolate, Map map,
DisallowHeapAllocation* no_gc);
inline TransitionsAccessor(Isolate* isolate, Handle<Map> map);
// Insert a new transition into |map|'s transition array, extending it
// as necessary.
// Requires the constructor that takes a Handle<Map> to have been used.
......@@ -70,8 +63,8 @@ class V8_EXPORT_PRIVATE TransitionsAccessor {
return FindTransitionToDataProperty(name, kFieldOnly);
}
Handle<String> ExpectedTransitionKey();
Handle<Map> ExpectedTransitionTarget();
inline Handle<String> ExpectedTransitionKey();
inline Handle<Map> ExpectedTransitionTarget();
int NumberOfTransitions();
// The size of transition arrays are limited so they do not end up in large
......@@ -143,11 +136,7 @@ class V8_EXPORT_PRIVATE TransitionsAccessor {
kFullTransitionArray,
};
void Reload() {
DCHECK(!map_handle_.is_null());
map_ = *map_handle_;
Initialize();
}
inline void Reload();
inline Encoding encoding() {
DCHECK(!needs_reload_);
......@@ -170,7 +159,7 @@ class V8_EXPORT_PRIVATE TransitionsAccessor {
#endif
}
void Initialize();
inline void Initialize();
inline Map GetSimpleTransition();
bool HasSimpleTransitionTo(Map map);
......
......@@ -133,7 +133,7 @@ class Utf16 {
class Latin1 {
public:
static const unsigned kMaxChar = 0xff;
static const uint16_t kMaxChar = 0xff;
// Convert the character to Latin-1 case equivalent if possible.
static inline uint16_t TryConvertToLatin1(uint16_t c) {
switch (c) {
......
......@@ -24,7 +24,7 @@
#include "src/objects/ordered-hash-table-inl.h"
#include "src/objects/smi.h"
#include "src/snapshot/code-serializer.h"
#include "src/transitions.h"
#include "src/transitions-inl.h"
#include "src/wasm/wasm-engine.h"
#include "src/wasm/wasm-objects-inl.h"
#include "src/wasm/wasm-result.h"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment