Commit de8aaef5 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[json] Speed up json parsing

- scan using raw data pointers + GC callback
- scan using scanner tables
- cap internalizing large string values
- inline fast transitioning logic

Change-Id: I545620017b38b80e4193dfaf19381411adf5ff89
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1584320
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61132}
parent 403cac98
......@@ -2571,6 +2571,8 @@ v8_source_set("v8_base_without_compiler") {
"src/parsing/expression-scope.h",
"src/parsing/func-name-inferrer.cc",
"src/parsing/func-name-inferrer.h",
"src/parsing/literal-buffer.cc",
"src/parsing/literal-buffer.h",
"src/parsing/parse-info.cc",
"src/parsing/parse-info.h",
"src/parsing/parser-base.h",
......
......@@ -54,37 +54,16 @@ class OneByteStringStream {
} // namespace
class AstRawStringInternalizationKey : public StringTableKey {
public:
explicit AstRawStringInternalizationKey(const AstRawString* string)
: StringTableKey(string->hash_field()), string_(string) {}
bool IsMatch(Object other) override {
if (string_->is_one_byte())
return String::cast(other)->IsOneByteEqualTo(string_->literal_bytes_);
return String::cast(other)->IsTwoByteEqualTo(
Vector<const uint16_t>::cast(string_->literal_bytes_));
}
Handle<String> AsHandle(Isolate* isolate) override {
if (string_->is_one_byte())
return isolate->factory()->NewOneByteInternalizedString(
string_->literal_bytes_, string_->hash_field());
return isolate->factory()->NewTwoByteInternalizedString(
Vector<const uint16_t>::cast(string_->literal_bytes_),
string_->hash_field());
}
private:
const AstRawString* string_;
};
void AstRawString::Internalize(Isolate* isolate) {
DCHECK(!has_string_);
if (literal_bytes_.length() == 0) {
set_string(isolate->factory()->empty_string());
} else if (is_one_byte()) {
OneByteStringKey key(hash_field_, literal_bytes_);
set_string(StringTable::LookupKey(isolate, &key));
} else {
AstRawStringInternalizationKey key(this);
TwoByteStringKey key(hash_field_,
Vector<const uint16_t>::cast(literal_bytes_));
set_string(StringTable::LookupKey(isolate, &key));
}
}
......
......@@ -685,7 +685,8 @@ Handle<AccessorPair> Factory::NewAccessorPair() {
}
// Internalized strings are created in the old generation (data space).
Handle<String> Factory::InternalizeUtf8String(Vector<const char> string) {
Handle<String> Factory::InternalizeUtf8String(
const Vector<const char>& string) {
Vector<const uint8_t> utf8_data = Vector<const uint8_t>::cast(string);
Utf8Decoder decoder(utf8_data);
if (decoder.is_ascii()) return InternalizeOneByteString(utf8_data);
......@@ -701,7 +702,8 @@ Handle<String> Factory::InternalizeUtf8String(Vector<const char> string) {
Vector<const uc16>(buffer.get(), decoder.utf16_length()));
}
Handle<String> Factory::InternalizeOneByteString(Vector<const uint8_t> string) {
Handle<String> Factory::InternalizeOneByteString(
const Vector<const uint8_t>& string) {
OneByteStringKey key(string, HashSeed(isolate()));
return InternalizeStringWithKey(&key);
}
......@@ -712,7 +714,8 @@ Handle<String> Factory::InternalizeOneByteString(
return InternalizeStringWithKey(&key);
}
Handle<String> Factory::InternalizeTwoByteString(Vector<const uc16> string) {
Handle<String> Factory::InternalizeTwoByteString(
const Vector<const uc16>& string) {
TwoByteStringKey key(string, HashSeed(isolate()));
return InternalizeStringWithKey(&key);
}
......@@ -722,8 +725,8 @@ Handle<String> Factory::InternalizeStringWithKey(StringTableKey* key) {
return StringTable::LookupKey(isolate(), key);
}
MaybeHandle<String> Factory::NewStringFromOneByte(Vector<const uint8_t> string,
AllocationType allocation) {
MaybeHandle<String> Factory::NewStringFromOneByte(
const Vector<const uint8_t>& string, AllocationType allocation) {
DCHECK_NE(allocation, AllocationType::kReadOnly);
int length = string.length();
if (length == 0) return empty_string();
......@@ -740,9 +743,9 @@ MaybeHandle<String> Factory::NewStringFromOneByte(Vector<const uint8_t> string,
return result;
}
MaybeHandle<String> Factory::NewStringFromUtf8(Vector<const char> data,
MaybeHandle<String> Factory::NewStringFromUtf8(const Vector<const char>& string,
AllocationType allocation) {
Vector<const uint8_t> utf8_data = Vector<const uint8_t>::cast(data);
Vector<const uint8_t> utf8_data = Vector<const uint8_t>::cast(string);
Utf8Decoder decoder(utf8_data);
if (decoder.utf16_length() == 0) return empty_string();
......@@ -846,8 +849,8 @@ MaybeHandle<String> Factory::NewStringFromTwoByte(const uc16* string,
}
}
MaybeHandle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string,
AllocationType allocation) {
MaybeHandle<String> Factory::NewStringFromTwoByte(
const Vector<const uc16>& string, AllocationType allocation) {
return NewStringFromTwoByte(string.begin(), string.length(), allocation);
}
......@@ -899,7 +902,7 @@ Handle<SeqOneByteString> Factory::AllocateRawOneByteInternalizedString(
}
Handle<String> Factory::AllocateTwoByteInternalizedString(
Vector<const uc16> str, uint32_t hash_field) {
const Vector<const uc16>& str, uint32_t hash_field) {
CHECK_GE(String::kMaxLength, str.length());
DCHECK_NE(0, str.length()); // Use Heap::empty_string() instead.
......@@ -958,8 +961,8 @@ Handle<String> Factory::AllocateInternalizedStringImpl(T t, int chars,
return answer;
}
Handle<String> Factory::NewOneByteInternalizedString(Vector<const uint8_t> str,
uint32_t hash_field) {
Handle<String> Factory::NewOneByteInternalizedString(
const Vector<const uint8_t>& str, uint32_t hash_field) {
Handle<SeqOneByteString> result =
AllocateRawOneByteInternalizedString(str.length(), hash_field);
DisallowHeapAllocation no_allocation;
......@@ -978,8 +981,8 @@ Handle<String> Factory::NewOneByteInternalizedSubString(
return result;
}
Handle<String> Factory::NewTwoByteInternalizedString(Vector<const uc16> str,
uint32_t hash_field) {
Handle<String> Factory::NewTwoByteInternalizedString(
const Vector<const uc16>& str, uint32_t hash_field) {
return AllocateTwoByteInternalizedString(str, hash_field);
}
......
......@@ -236,16 +236,16 @@ class V8_EXPORT_PRIVATE Factory {
// Finds the internalized copy for string in the string table.
// If not found, a new string is added to the table and returned.
Handle<String> InternalizeUtf8String(Vector<const char> str);
Handle<String> InternalizeUtf8String(const Vector<const char>& str);
Handle<String> InternalizeUtf8String(const char* str) {
return InternalizeUtf8String(CStrVector(str));
}
Handle<String> InternalizeOneByteString(Vector<const uint8_t> str);
Handle<String> InternalizeOneByteString(const Vector<const uint8_t>& str);
Handle<String> InternalizeOneByteString(Handle<SeqOneByteString>, int from,
int length);
Handle<String> InternalizeTwoByteString(Vector<const uc16> str);
Handle<String> InternalizeTwoByteString(const Vector<const uc16>& str);
template <class StringTableKey>
Handle<String> InternalizeStringWithKey(StringTableKey* key);
......@@ -276,7 +276,7 @@ class V8_EXPORT_PRIVATE Factory {
//
// One-byte strings are pretenured when used as keys in the SourceCodeCache.
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromOneByte(
Vector<const uint8_t> str,
const Vector<const uint8_t>& str,
AllocationType allocation = AllocationType::kYoung);
template <size_t N>
......@@ -297,7 +297,7 @@ class V8_EXPORT_PRIVATE Factory {
// UTF8 strings are pretenured when used for regexp literal patterns and
// flags in the parser.
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf8(
Vector<const char> str,
const Vector<const char>& str,
AllocationType allocation = AllocationType::kYoung);
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf8SubString(
......@@ -305,7 +305,7 @@ class V8_EXPORT_PRIVATE Factory {
AllocationType allocation = AllocationType::kYoung);
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromTwoByte(
Vector<const uc16> str,
const Vector<const uc16>& str,
AllocationType allocation = AllocationType::kYoung);
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromTwoByte(
......@@ -314,14 +314,14 @@ class V8_EXPORT_PRIVATE Factory {
Handle<JSStringIterator> NewJSStringIterator(Handle<String> string);
Handle<String> NewOneByteInternalizedString(Vector<const uint8_t> str,
Handle<String> NewOneByteInternalizedString(const Vector<const uint8_t>& str,
uint32_t hash_field);
Handle<String> NewOneByteInternalizedSubString(
Handle<SeqOneByteString> string, int offset, int length,
uint32_t hash_field);
Handle<String> NewTwoByteInternalizedString(Vector<const uc16> str,
Handle<String> NewTwoByteInternalizedString(const Vector<const uc16>& str,
uint32_t hash_field);
Handle<String> NewInternalizedStringImpl(Handle<String> string, int chars,
......@@ -1050,8 +1050,8 @@ class V8_EXPORT_PRIVATE Factory {
Handle<SeqOneByteString> AllocateRawOneByteInternalizedString(
int length, uint32_t hash_field);
Handle<String> AllocateTwoByteInternalizedString(Vector<const uc16> str,
uint32_t hash_field);
Handle<String> AllocateTwoByteInternalizedString(
const Vector<const uc16>& str, uint32_t hash_field);
MaybeHandle<String> NewStringFromTwoByte(const uc16* string, int length,
AllocationType allocation);
......
This diff is collapsed.
......@@ -8,12 +8,13 @@
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects.h"
#include "src/parsing/literal-buffer.h"
#include "src/zone/zone-containers.h"
namespace v8 {
namespace internal {
enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle };
enum ParseElementResult { kElementFound, kElementNotFound };
class JsonParseInternalizer {
public:
......@@ -34,6 +35,24 @@ class JsonParseInternalizer {
Handle<JSReceiver> reviver_;
};
enum class JsonToken : uint8_t {
NUMBER,
NEGATIVE_NUMBER,
STRING,
LBRACE,
RBRACE,
LBRACK,
RBRACK,
TRUE_LITERAL,
FALSE_LITERAL,
NULL_LITERAL,
WHITESPACE,
COLON,
COMMA,
ILLEGAL,
EOS
};
// A simple json parser.
template <typename Char>
class JsonParser final {
......@@ -55,7 +74,12 @@ class JsonParser final {
static const int kEndOfString = -1;
private:
Handle<String> Internalize(int start, int length);
template <typename LiteralChar>
Handle<String> MakeString(bool requires_internalization,
const Vector<const LiteralChar>& chars);
Handle<String> MakeString(bool requires_internalization, int offset,
int length);
JsonParser(Isolate* isolate, Handle<String> source);
~JsonParser();
......@@ -63,38 +87,67 @@ class JsonParser final {
// Parse a string containing a single JSON value.
MaybeHandle<Object> ParseJson();
V8_INLINE void Advance();
void advance() { ++cursor_; }
Char NextCharacter();
V8_INLINE JsonToken peek() const { return next_; }
void Consume(JsonToken token) {
DCHECK_EQ(peek(), token);
advance();
}
void Expect(JsonToken token) {
if (V8_LIKELY(peek() == token)) {
advance();
} else {
ReportUnexpectedCharacter(peek());
}
}
void ExpectNext(JsonToken token) {
SkipWhitespace();
Expect(token);
}
bool Check(JsonToken token) {
SkipWhitespace();
if (next_ != token) return false;
advance();
return true;
}
template <size_t N>
void ScanLiteral(const char (&s)[N]) {
DCHECK(!is_at_end());
if (V8_UNLIKELY(static_cast<size_t>(end_ - cursor_) < N - 1)) {
ReportUnexpectedCharacter(JsonToken::EOS);
return;
}
// There's at least 1 character, we always consume a character and compare
// the next character. The first character was compared before we jumped to
// ScanLiteral.
STATIC_ASSERT(N > 2);
if (V8_LIKELY(CompareChars(s + 1, cursor_ + 1, N - 2) == 0)) {
cursor_ += N - 1;
} else {
ReportUnexpectedCharacter();
}
}
// The JSON lexical grammar is specified in the ECMAScript 5 standard,
// section 15.12.1.1. The only allowed whitespace characters between tokens
// are tab, carriage-return, newline and space.
V8_INLINE void AdvanceSkipWhitespace();
V8_INLINE void SkipWhitespace();
V8_INLINE uc32 AdvanceGetChar();
// Checks that current charater is c.
// If so, then consume c and skip whitespace.
V8_INLINE bool MatchSkipWhiteSpace(uc32 c);
void SkipWhitespace();
// A JSON string (production JSONString) is subset of valid JavaScript string
// literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
bool ParseJsonString(Handle<String> expected);
Handle<String> ParseJsonString() {
Handle<String> result = ScanJsonString();
if (result.is_null()) return result;
return factory()->InternalizeString(result);
}
Handle<String> ParseJsonString(bool requires_internalization,
Handle<String> expected = Handle<String>());
Handle<String> ScanJsonString();
// Creates a new string and copies prefix[start..end] into the beginning
// of it. Then scans the rest of the string, adding characters after the
// prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char.
template <typename StringType, typename SinkChar>
Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
......@@ -102,7 +155,7 @@ class JsonParser final {
// digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed.
Handle<Object> ParseJsonNumber();
Handle<Object> ParseJsonNumber(int sign, const Char* start);
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
......@@ -118,8 +171,9 @@ class JsonParser final {
Handle<Object> ParseJsonObject();
// Helper for ParseJsonObject. Parses the form "123": obj, which is recorded
// as an element, not a property.
ParseElementResult ParseElement(Handle<JSObject> json_object);
// as an element, not a property. Returns false if we should retry parsing the
// key as a non-element. (Returns true if it's an index or hits EOS).
bool ParseElement(Handle<JSObject> json_object);
// Parses a JSON array literal (grammar production JSONArray). An array
// literal is a square-bracketed and comma separated sequence (possibly empty)
......@@ -128,12 +182,8 @@ class JsonParser final {
// it allow a terminal comma, like a JavaScript array does.
Handle<Object> ParseJsonArray();
// Mark that a parsing error has happened at the current token, and
// return a null handle. Primarily for readability.
inline Handle<Object> ReportUnexpectedCharacter() {
return Handle<Object>::null();
}
// Mark that a parsing error has happened at the current character.
void ReportUnexpectedCharacter(JsonToken token = JsonToken::ILLEGAL);
inline Isolate* isolate() { return isolate_; }
inline Factory* factory() { return isolate_->factory(); }
......@@ -151,48 +201,58 @@ class JsonParser final {
DisallowHeapAllocation no_gc;
const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
if (chars_ != chars) {
size_t position = cursor_ - chars_;
size_t length = end_ - chars_;
chars_ = chars;
cursor_ = chars_ + position;
end_ = chars_ + length;
}
}
private:
static const bool kIsOneByte = sizeof(Char) == 1;
static const int kMaxInternalizedStringValueLength = 25;
Zone* zone() { return &zone_; }
// Casts |c| to uc32 avoiding LiteralBuffer::AddChar(char) in one-byte-strings
// with escapes that can result in two-byte strings.
void AddLiteralChar(uc32 c) { literal_buffer_.AddChar(c); }
void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
Vector<const Handle<Object>> properties);
const Vector<const Handle<Object>>& properties);
bool is_at_end() const {
DCHECK_LE(cursor_, end_);
return cursor_ == end_;
}
int position() const { return static_cast<int>(cursor_ - chars_); }
Isolate* isolate_;
Zone zone_;
const uint64_t hash_seed_;
AllocationType allocation_;
Handle<JSFunction> object_constructor_;
const Handle<String> original_source_;
Handle<String> source_;
int offset_;
int length_;
// Cached pointer to the raw chars in source. In case source is on-heap, we
// register an UpdatePointers callback. For this reason, chars_ should never
// be locally cached across a possible allocation. The scope in which we
// cache chars has to be guarded by a DisallowHeapAllocation scope.
// TODO(verwaest): Move chars_ and functions that operate over chars to a
// separate helper class that makes it clear that all functions need to be
// guarded.
// register an UpdatePointers callback. For this reason, chars_, cursor_ and
// end_ should never be locally cached across a possible allocation. The scope
// in which we cache chars has to be guarded by a DisallowHeapAllocation
// scope.
const Char* chars_;
const Char* cursor_;
const Char* end_;
uc32 c0_;
int position_;
JsonToken next_;
LiteralBuffer literal_buffer_;
// Indicates whether the bytes underneath source_ can relocate during GC.
bool chars_may_relocate_;
// Property handles are stored here inside ParseJsonObject.
ZoneVector<Handle<Object>> properties_;
};
template <>
Handle<String> JsonParser<uint8_t>::Internalize(int start, int length);
template <>
Handle<String> JsonParser<uint16_t>::Internalize(int start, int length);
// Explicit instantiation declarations.
extern template class JsonParser<uint8_t>;
extern template class JsonParser<uint16_t>;
......
......@@ -6477,23 +6477,21 @@ class RegExpKey : public HashTableKey {
Smi flags_;
};
Handle<String> OneByteStringKey::AsHandle(Isolate* isolate) {
return isolate->factory()->NewOneByteInternalizedString(string_, HashField());
}
Handle<String> TwoByteStringKey::AsHandle(Isolate* isolate) {
return isolate->factory()->NewTwoByteInternalizedString(string_, HashField());
}
Handle<String> SeqOneByteSubStringKey::AsHandle(Isolate* isolate) {
return isolate->factory()->NewOneByteInternalizedSubString(
string_, from_, length_, HashField());
}
bool SeqOneByteSubStringKey::IsMatch(Object string) {
bool SeqOneByteSubStringKey::IsMatch(Object object) {
DisallowHeapAllocation no_gc;
Vector<const uint8_t> chars(string_->GetChars(no_gc) + from_, length_);
return String::cast(string)->IsOneByteEqualTo(chars);
String string = String::cast(object);
if (string.length() != length_) return false;
if (string.IsOneByteRepresentation()) {
const uint8_t* data = string.GetChars<uint8_t>(no_gc);
return CompareChars(string_->GetChars(no_gc) + from_, data, length_) == 0;
}
const uint16_t* data = string.GetChars<uint16_t>(no_gc);
return CompareChars(string_->GetChars(no_gc) + from_, data, length_) == 0;
}
// InternalizedStringKey carries a string/internalized-string object as key.
......
......@@ -19,7 +19,7 @@
#include "src/objects/shared-function-info.h"
#include "src/objects/templates-inl.h"
#include "src/property.h"
#include "src/transitions.h"
#include "src/transitions-inl.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
......
......@@ -195,29 +195,45 @@ Char FlatStringReader::Get(int index) {
}
template <typename Char>
class SequentialStringKey : public StringTableKey {
class SequentialStringKey final : public StringTableKey {
public:
explicit SequentialStringKey(Vector<const Char> string, uint64_t seed)
: StringTableKey(StringHasher::HashSequentialString<Char>(
string.begin(), string.length(), seed)),
string_(string) {}
SequentialStringKey(const Vector<const Char>& chars, uint64_t seed)
: SequentialStringKey(StringHasher::HashSequentialString<Char>(
chars.begin(), chars.length(), seed),
chars) {}
Vector<const Char> string_;
};
SequentialStringKey(int hash, const Vector<const Char>& chars)
: StringTableKey(hash), chars_(chars) {}
class OneByteStringKey : public SequentialStringKey<uint8_t> {
public:
OneByteStringKey(Vector<const uint8_t> str, uint64_t seed)
: SequentialStringKey<uint8_t>(str, seed) {}
bool IsMatch(Object other) override {
DisallowHeapAllocation no_gc;
String s = String::cast(other);
if (s.length() != chars_.length()) return false;
if (s->IsOneByteRepresentation()) {
const uint8_t* chars = s.GetChars<uint8_t>(no_gc);
return CompareChars(chars, chars_.begin(), chars_.length()) == 0;
}
const uint16_t* chars = s.GetChars<uint16_t>(no_gc);
return CompareChars(chars, chars_.begin(), chars_.length()) == 0;
}
bool IsMatch(Object string) override {
return String::cast(string)->IsOneByteEqualTo(string_);
Handle<String> AsHandle(Isolate* isolate) override {
if (sizeof(Char) == 1) {
return isolate->factory()->NewOneByteInternalizedString(
Vector<const uint8_t>::cast(chars_), HashField());
}
return isolate->factory()->NewTwoByteInternalizedString(
Vector<const uint16_t>::cast(chars_), HashField());
}
Handle<String> AsHandle(Isolate* isolate) override;
private:
Vector<const Char> chars_;
};
class SeqOneByteSubStringKey : public StringTableKey {
using OneByteStringKey = SequentialStringKey<uint8_t>;
using TwoByteStringKey = SequentialStringKey<uint16_t>;
class SeqOneByteSubStringKey final : public StringTableKey {
public:
// VS 2017 on official builds gives this spurious warning:
// warning C4789: buffer 'key' of size 16 bytes will be overrun; 4 bytes will
......@@ -253,18 +269,6 @@ class SeqOneByteSubStringKey : public StringTableKey {
int length_;
};
class TwoByteStringKey : public SequentialStringKey<uc16> {
public:
explicit TwoByteStringKey(Vector<const uc16> str, uint64_t seed)
: SequentialStringKey<uc16>(str, seed) {}
bool IsMatch(Object string) override {
return String::cast(string)->IsTwoByteEqualTo(string_);
}
Handle<String> AsHandle(Isolate* isolate) override;
};
bool String::Equals(String other) {
if (other == *this) return true;
if (this->IsInternalizedString() && other->IsInternalizedString()) {
......@@ -281,6 +285,13 @@ bool String::Equals(Isolate* isolate, Handle<String> one, Handle<String> two) {
return SlowEquals(isolate, one, two);
}
template <typename Char>
const Char* String::GetChars(const DisallowHeapAllocation& no_gc) {
return StringShape(*this).IsExternal()
? CharTraits<Char>::ExternalString::cast(*this).GetChars()
: CharTraits<Char>::String::cast(*this).GetChars(no_gc);
}
Handle<String> String::Flatten(Isolate* isolate, Handle<String> string,
AllocationType allocation) {
if (string->IsConsString()) {
......
......@@ -146,6 +146,10 @@ class String : public Name {
V8_INLINE Vector<const Char> GetCharVector(
const DisallowHeapAllocation& no_gc);
// Get chars from sequential or external strings.
template <typename Char>
inline const Char* GetChars(const DisallowHeapAllocation& no_gc);
// Get and set the length of the string.
inline int length() const;
inline void set_length(int value);
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/parsing/literal-buffer.h"
#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/memcopy.h"
namespace v8 {
namespace internal {
Handle<String> LiteralBuffer::Internalize(Isolate* isolate) const {
if (is_one_byte()) {
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
}
return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
}
int LiteralBuffer::NewCapacity(int min_capacity) {
return min_capacity < (kMaxGrowth / (kGrowthFactor - 1))
? min_capacity * kGrowthFactor
: min_capacity + kMaxGrowth;
}
void LiteralBuffer::ExpandBuffer() {
int min_capacity = Max(kInitialCapacity, backing_store_.length());
Vector<byte> new_store = Vector<byte>::New(NewCapacity(min_capacity));
if (position_ > 0) {
MemCopy(new_store.begin(), backing_store_.begin(), position_);
}
backing_store_.Dispose();
backing_store_ = new_store;
}
void LiteralBuffer::ConvertToTwoByte() {
DCHECK(is_one_byte());
Vector<byte> new_store;
int new_content_size = position_ * kUC16Size;
if (new_content_size >= backing_store_.length()) {
// Ensure room for all currently read code units as UC16 as well
// as the code unit about to be stored.
new_store = Vector<byte>::New(NewCapacity(new_content_size));
} else {
new_store = backing_store_;
}
uint8_t* src = backing_store_.begin();
uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.begin());
for (int i = position_ - 1; i >= 0; i--) {
dst[i] = src[i];
}
if (new_store.begin() != backing_store_.begin()) {
backing_store_.Dispose();
backing_store_ = new_store;
}
position_ = new_content_size;
is_one_byte_ = false;
}
void LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
DCHECK(!is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
if (code_unit <=
static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
position_ += kUC16Size;
} else {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::LeadSurrogate(code_unit);
position_ += kUC16Size;
if (position_ >= backing_store_.length()) ExpandBuffer();
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::TrailSurrogate(code_unit);
position_ += kUC16Size;
}
}
} // namespace internal
} // namespace v8
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_PARSING_LITERAL_BUFFER_H_
#define V8_PARSING_LITERAL_BUFFER_H_
#include "src/unicode-decoder.h"
#include "src/vector.h"
namespace v8 {
namespace internal {
// LiteralBuffer - Collector of chars of literals.
class LiteralBuffer final {
public:
LiteralBuffer() : backing_store_(), position_(0), is_one_byte_(true) {}
~LiteralBuffer() { backing_store_.Dispose(); }
V8_INLINE void AddChar(char code_unit) {
DCHECK(IsValidAscii(code_unit));
AddOneByteChar(static_cast<byte>(code_unit));
}
V8_INLINE void AddChar(uc32 code_unit) {
if (is_one_byte()) {
if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
AddOneByteChar(static_cast<byte>(code_unit));
return;
}
ConvertToTwoByte();
}
AddTwoByteChar(code_unit);
}
bool is_one_byte() const { return is_one_byte_; }
bool Equals(Vector<const char> keyword) const {
return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.begin(), backing_store_.begin(), position_) == 0);
}
Vector<const uint16_t> two_byte_literal() const {
return literal<uint16_t>();
}
Vector<const uint8_t> one_byte_literal() const { return literal<uint8_t>(); }
template <typename Char>
Vector<const Char> literal() const {
DCHECK_EQ(is_one_byte_, sizeof(Char) == 1);
DCHECK_EQ(position_ & (sizeof(Char) - 1), 0);
return Vector<const Char>(
reinterpret_cast<const Char*>(backing_store_.begin()),
position_ >> (sizeof(Char) - 1));
}
int length() const { return is_one_byte() ? position_ : (position_ >> 1); }
void Start() {
position_ = 0;
is_one_byte_ = true;
}
Handle<String> Internalize(Isolate* isolate) const;
private:
static const int kInitialCapacity = 16;
static const int kGrowthFactor = 4;
static const int kMaxGrowth = 1 * MB;
inline bool IsValidAscii(char code_unit) {
// Control characters and printable characters span the range of
// valid ASCII characters (0-127). Chars are unsigned on some
// platforms which causes compiler warnings if the validity check
// tests the lower bound >= 0 as it's always true.
return iscntrl(code_unit) || isprint(code_unit);
}
V8_INLINE void AddOneByteChar(byte one_byte_char) {
DCHECK(is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
backing_store_[position_] = one_byte_char;
position_ += kOneByteSize;
}
void AddTwoByteChar(uc32 code_unit);
int NewCapacity(int min_capacity);
void ExpandBuffer();
void ConvertToTwoByte();
Vector<byte> backing_store_;
int position_;
bool is_one_byte_;
DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
};
} // namespace internal
} // namespace v8
#endif // V8_PARSING_LITERAL_BUFFER_H_
......@@ -54,74 +54,6 @@ class Scanner::ErrorState {
Scanner::Location const old_location_;
};
// ----------------------------------------------------------------------------
// Scanner::LiteralBuffer
Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
if (is_one_byte()) {
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
}
return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
}
int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
return min_capacity < (kMaxGrowth / (kGrowthFactor - 1))
? min_capacity * kGrowthFactor
: min_capacity + kMaxGrowth;
}
void Scanner::LiteralBuffer::ExpandBuffer() {
int min_capacity = Max(kInitialCapacity, backing_store_.length());
Vector<byte> new_store = Vector<byte>::New(NewCapacity(min_capacity));
if (position_ > 0) {
MemCopy(new_store.begin(), backing_store_.begin(), position_);
}
backing_store_.Dispose();
backing_store_ = new_store;
}
void Scanner::LiteralBuffer::ConvertToTwoByte() {
DCHECK(is_one_byte());
Vector<byte> new_store;
int new_content_size = position_ * kUC16Size;
if (new_content_size >= backing_store_.length()) {
// Ensure room for all currently read code units as UC16 as well
// as the code unit about to be stored.
new_store = Vector<byte>::New(NewCapacity(new_content_size));
} else {
new_store = backing_store_;
}
uint8_t* src = backing_store_.begin();
uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.begin());
for (int i = position_ - 1; i >= 0; i--) {
dst[i] = src[i];
}
if (new_store.begin() != backing_store_.begin()) {
backing_store_.Dispose();
backing_store_ = new_store;
}
position_ = new_content_size;
is_one_byte_ = false;
}
void Scanner::LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
DCHECK(!is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
if (code_unit <=
static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
position_ += kUC16Size;
} else {
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::LeadSurrogate(code_unit);
position_ += kUC16Size;
if (position_ >= backing_store_.length()) ExpandBuffer();
*reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
unibrow::Utf16::TrailSurrogate(code_unit);
position_ += kUC16Size;
}
}
// ----------------------------------------------------------------------------
// Scanner::BookmarkScope
......
......@@ -14,6 +14,7 @@
#include "src/char-predicates.h"
#include "src/globals.h"
#include "src/message-template.h"
#include "src/parsing/literal-buffer.h"
#include "src/parsing/token.h"
#include "src/pointer-with-payload.h"
#include "src/unicode.h"
......@@ -429,92 +430,6 @@ class V8_EXPORT_PRIVATE Scanner {
// escape sequences are allowed.
class ErrorState;
// LiteralBuffer - Collector of chars of literals.
class LiteralBuffer {
public:
LiteralBuffer() : backing_store_(), position_(0), is_one_byte_(true) {}
~LiteralBuffer() { backing_store_.Dispose(); }
V8_INLINE void AddChar(char code_unit) {
DCHECK(IsValidAscii(code_unit));
AddOneByteChar(static_cast<byte>(code_unit));
}
V8_INLINE void AddChar(uc32 code_unit) {
if (is_one_byte()) {
if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
AddOneByteChar(static_cast<byte>(code_unit));
return;
}
ConvertToTwoByte();
}
AddTwoByteChar(code_unit);
}
bool is_one_byte() const { return is_one_byte_; }
bool Equals(Vector<const char> keyword) const {
return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.begin(), backing_store_.begin(), position_) == 0);
}
Vector<const uint16_t> two_byte_literal() const {
DCHECK(!is_one_byte());
DCHECK_EQ(position_ & 0x1, 0);
return Vector<const uint16_t>(
reinterpret_cast<const uint16_t*>(backing_store_.begin()),
position_ >> 1);
}
Vector<const uint8_t> one_byte_literal() const {
DCHECK(is_one_byte());
return Vector<const uint8_t>(
reinterpret_cast<const uint8_t*>(backing_store_.begin()), position_);
}
int length() const { return is_one_byte() ? position_ : (position_ >> 1); }
void Start() {
position_ = 0;
is_one_byte_ = true;
}
Handle<String> Internalize(Isolate* isolate) const;
private:
static const int kInitialCapacity = 16;
static const int kGrowthFactor = 4;
static const int kMaxGrowth = 1 * MB;
inline bool IsValidAscii(char code_unit) {
// Control characters and printable characters span the range of
// valid ASCII characters (0-127). Chars are unsigned on some
// platforms which causes compiler warnings if the validity check
// tests the lower bound >= 0 as it's always true.
return iscntrl(code_unit) || isprint(code_unit);
}
V8_INLINE void AddOneByteChar(byte one_byte_char) {
DCHECK(is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
backing_store_[position_] = one_byte_char;
position_ += kOneByteSize;
}
void AddTwoByteChar(uc32 code_unit);
int NewCapacity(int min_capacity);
void ExpandBuffer();
void ConvertToTwoByte();
Vector<byte> backing_store_;
int position_;
bool is_one_byte_;
DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
};
// The current and look-ahead token.
struct TokenDesc {
Location location = {0, 0};
......
......@@ -176,6 +176,49 @@ int TransitionArray::SearchName(Name name, int* out_insertion_index) {
out_insertion_index);
}
TransitionsAccessor::TransitionsAccessor(Isolate* isolate, Map map,
DisallowHeapAllocation* no_gc)
: isolate_(isolate), map_(map) {
Initialize();
USE(no_gc);
}
TransitionsAccessor::TransitionsAccessor(Isolate* isolate, Handle<Map> map)
: isolate_(isolate), map_handle_(map), map_(*map) {
Initialize();
}
void TransitionsAccessor::Reload() {
DCHECK(!map_handle_.is_null());
map_ = *map_handle_;
Initialize();
}
void TransitionsAccessor::Initialize() {
raw_transitions_ = map_->raw_transitions();
HeapObject heap_object;
if (raw_transitions_->IsSmi() || raw_transitions_->IsCleared()) {
encoding_ = kUninitialized;
} else if (raw_transitions_->IsWeak()) {
encoding_ = kWeakRef;
} else if (raw_transitions_->GetHeapObjectIfStrong(&heap_object)) {
if (heap_object->IsTransitionArray()) {
encoding_ = kFullTransitionArray;
} else if (heap_object->IsPrototypeInfo()) {
encoding_ = kPrototypeInfo;
} else {
DCHECK(map_->is_deprecated());
DCHECK(heap_object->IsMap());
encoding_ = kMigrationTarget;
}
} else {
UNREACHABLE();
}
#if DEBUG
needs_reload_ = false;
#endif
}
int TransitionArray::number_of_transitions() const {
if (length() < kFirstIndex) return 0;
return Get(kTransitionLengthIndex).ToSmi().value();
......@@ -243,6 +286,33 @@ void TransitionArray::SetNumberOfTransitions(int number_of_transitions) {
MaybeObject::FromSmi(Smi::FromInt(number_of_transitions)));
}
Handle<String> TransitionsAccessor::ExpectedTransitionKey() {
DisallowHeapAllocation no_gc;
switch (encoding()) {
case kPrototypeInfo:
case kUninitialized:
case kMigrationTarget:
case kFullTransitionArray:
return Handle<String>::null();
case kWeakRef: {
Map target = Map::cast(raw_transitions_->GetHeapObjectAssumeWeak());
PropertyDetails details = GetSimpleTargetDetails(target);
if (details.location() != kField) return Handle<String>::null();
DCHECK_EQ(kData, details.kind());
if (details.attributes() != NONE) return Handle<String>::null();
Name name = GetSimpleTransitionKey(target);
if (!name->IsString()) return Handle<String>::null();
return handle(String::cast(name), isolate_);
}
}
UNREACHABLE();
}
Handle<Map> TransitionsAccessor::ExpectedTransitionTarget() {
DCHECK(!ExpectedTransitionKey().is_null());
return handle(GetTarget(0), isolate_);
}
} // namespace internal
} // namespace v8
......
......@@ -11,31 +11,6 @@
namespace v8 {
namespace internal {
void TransitionsAccessor::Initialize() {
raw_transitions_ = map_->raw_transitions();
HeapObject heap_object;
if (raw_transitions_->IsSmi() || raw_transitions_->IsCleared()) {
encoding_ = kUninitialized;
} else if (raw_transitions_->IsWeak()) {
encoding_ = kWeakRef;
} else if (raw_transitions_->GetHeapObjectIfStrong(&heap_object)) {
if (heap_object->IsTransitionArray()) {
encoding_ = kFullTransitionArray;
} else if (heap_object->IsPrototypeInfo()) {
encoding_ = kPrototypeInfo;
} else {
DCHECK(map_->is_deprecated());
DCHECK(heap_object->IsMap());
encoding_ = kMigrationTarget;
}
} else {
UNREACHABLE();
}
#if DEBUG
needs_reload_ = false;
#endif
}
Map TransitionsAccessor::GetSimpleTransition() {
switch (encoding()) {
case kWeakRef:
......@@ -262,33 +237,6 @@ MaybeHandle<Map> TransitionsAccessor::FindTransitionToDataProperty(
return Handle<Map>(target, isolate_);
}
Handle<String> TransitionsAccessor::ExpectedTransitionKey() {
DisallowHeapAllocation no_gc;
switch (encoding()) {
case kPrototypeInfo:
case kUninitialized:
case kMigrationTarget:
case kFullTransitionArray:
return Handle<String>::null();
case kWeakRef: {
Map target = Map::cast(raw_transitions_->GetHeapObjectAssumeWeak());
PropertyDetails details = GetSimpleTargetDetails(target);
if (details.location() != kField) return Handle<String>::null();
DCHECK_EQ(kData, details.kind());
if (details.attributes() != NONE) return Handle<String>::null();
Name name = GetSimpleTransitionKey(target);
if (!name->IsString()) return Handle<String>::null();
return handle(String::cast(name), isolate_);
}
}
UNREACHABLE();
}
Handle<Map> TransitionsAccessor::ExpectedTransitionTarget() {
DCHECK(!ExpectedTransitionKey().is_null());
return handle(GetTarget(0), isolate_);
}
bool TransitionsAccessor::CanHaveMoreTransitions() {
if (map_->is_dictionary_map()) return false;
if (encoding() == kFullTransitionArray) {
......
......@@ -38,16 +38,9 @@ namespace internal {
// cleared when the map they refer to is not otherwise reachable.
class V8_EXPORT_PRIVATE TransitionsAccessor {
public:
TransitionsAccessor(Isolate* isolate, Map map, DisallowHeapAllocation* no_gc)
: isolate_(isolate), map_(map) {
Initialize();
USE(no_gc);
}
TransitionsAccessor(Isolate* isolate, Handle<Map> map)
: isolate_(isolate), map_handle_(map), map_(*map) {
Initialize();
}
inline TransitionsAccessor(Isolate* isolate, Map map,
DisallowHeapAllocation* no_gc);
inline TransitionsAccessor(Isolate* isolate, Handle<Map> map);
// Insert a new transition into |map|'s transition array, extending it
// as necessary.
// Requires the constructor that takes a Handle<Map> to have been used.
......@@ -70,8 +63,8 @@ class V8_EXPORT_PRIVATE TransitionsAccessor {
return FindTransitionToDataProperty(name, kFieldOnly);
}
Handle<String> ExpectedTransitionKey();
Handle<Map> ExpectedTransitionTarget();
inline Handle<String> ExpectedTransitionKey();
inline Handle<Map> ExpectedTransitionTarget();
int NumberOfTransitions();
// The size of transition arrays are limited so they do not end up in large
......@@ -143,11 +136,7 @@ class V8_EXPORT_PRIVATE TransitionsAccessor {
kFullTransitionArray,
};
void Reload() {
DCHECK(!map_handle_.is_null());
map_ = *map_handle_;
Initialize();
}
inline void Reload();
inline Encoding encoding() {
DCHECK(!needs_reload_);
......@@ -170,7 +159,7 @@ class V8_EXPORT_PRIVATE TransitionsAccessor {
#endif
}
void Initialize();
inline void Initialize();
inline Map GetSimpleTransition();
bool HasSimpleTransitionTo(Map map);
......
......@@ -133,7 +133,7 @@ class Utf16 {
class Latin1 {
public:
static const unsigned kMaxChar = 0xff;
static const uint16_t kMaxChar = 0xff;
// Convert the character to Latin-1 case equivalent if possible.
static inline uint16_t TryConvertToLatin1(uint16_t c) {
switch (c) {
......
......@@ -24,7 +24,7 @@
#include "src/objects/ordered-hash-table-inl.h"
#include "src/objects/smi.h"
#include "src/snapshot/code-serializer.h"
#include "src/transitions.h"
#include "src/transitions-inl.h"
#include "src/wasm/wasm-engine.h"
#include "src/wasm/wasm-objects-inl.h"
#include "src/wasm/wasm-result.h"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment