Commit 4b60b40a authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[json] Make json parsing iterative

This avoids the need to throw range errors when we run out of stack, limiting
us only by available memory.

The main parser loop is implemented by two subloops.

The first subloop finishes whenever it generates primitive values, empty
arrays, or empty objects. If a non-empty object or array is started, the loop
continues to parse its first member.

The second subloop consumes produced values and either adds them to the parent
array or object, or returns it. The second loop finishes whenever a next value
needs to be produced. When the loop itself produces a finished array or object,
the loop continues.

Exceptions are handled by moving the cursor to end-of-input. Upon end-of-input,
the first loop sets the continuation to "kFail". That causes the second loop to
tear down continuation stack and related handle scopes, resulting in an empty
handle.

The CL additionally buffers all named properties and elements so we can
immediately allocate a correctly shaped object. For object elements we'll take
flat array or dictionary encoding depending on what is more efficient.

This means that element handles are now allocated in their parent HandleScope,
rather than having local handlescopes per-property (of big objects); which is
why I've adjusted the handle-count test to not allocate as many properties. In
the future it would be nice to not have to allocate (as many) handles since
almost everything in the JSON graph will survive JSON parsing...

Bug: chromium:710383
Change-Id: Ia3a7fd0ac260fb1c0e5f929276792b2f8e5fc0ca
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1609802Reviewed-by: 's avatarHannes Payer <hpayer@chromium.org>
Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61533}
parent c39cabbc
...@@ -29,14 +29,6 @@ const Handle<T> Handle<T>::cast(Handle<S> that) { ...@@ -29,14 +29,6 @@ const Handle<T> Handle<T>::cast(Handle<S> that) {
return Handle<T>(that.location_); return Handle<T>(that.location_);
} }
HandleScope::HandleScope(Isolate* isolate) {
HandleScopeData* data = isolate->handle_scope_data();
isolate_ = isolate;
prev_next_ = data->next;
prev_limit_ = data->limit;
data->level++;
}
template <typename T> template <typename T>
Handle<T>::Handle(T object, Isolate* isolate) Handle<T>::Handle(T object, Isolate* isolate)
: HandleBase(object.ptr(), isolate) {} : HandleBase(object.ptr(), isolate) {}
...@@ -51,24 +43,45 @@ inline std::ostream& operator<<(std::ostream& os, Handle<T> handle) { ...@@ -51,24 +43,45 @@ inline std::ostream& operator<<(std::ostream& os, Handle<T> handle) {
return os << Brief(*handle); return os << Brief(*handle);
} }
HandleScope::HandleScope(Isolate* isolate) {
HandleScopeData* data = isolate->handle_scope_data();
isolate_ = isolate;
prev_next_ = data->next;
prev_limit_ = data->limit;
data->level++;
}
HandleScope::HandleScope(HandleScope&& other) V8_NOEXCEPT
: isolate_(other.isolate_),
prev_next_(other.prev_next_),
prev_limit_(other.prev_limit_) {
other.isolate_ = nullptr;
}
HandleScope::~HandleScope() { HandleScope::~HandleScope() {
#ifdef DEBUG if (isolate_ == nullptr) return;
if (FLAG_check_handle_count) { CloseScope(isolate_, prev_next_, prev_limit_);
int before = NumberOfHandles(isolate_); }
CloseScope(isolate_, prev_next_, prev_limit_);
int after = NumberOfHandles(isolate_); HandleScope& HandleScope::operator=(HandleScope&& other) V8_NOEXCEPT {
DCHECK_LT(after - before, kCheckHandleThreshold); if (isolate_ == nullptr) {
DCHECK_LT(before, kCheckHandleThreshold); isolate_ = other.isolate_;
} else { } else {
#endif // DEBUG DCHECK_EQ(isolate_, other.isolate_);
CloseScope(isolate_, prev_next_, prev_limit_); CloseScope(isolate_, prev_next_, prev_limit_);
#ifdef DEBUG
} }
#endif // DEBUG prev_next_ = other.prev_next_;
prev_limit_ = other.prev_limit_;
other.isolate_ = nullptr;
return *this;
} }
void HandleScope::CloseScope(Isolate* isolate, Address* prev_next, void HandleScope::CloseScope(Isolate* isolate, Address* prev_next,
Address* prev_limit) { Address* prev_limit) {
#ifdef DEBUG
int before = FLAG_check_handle_count ? NumberOfHandles(isolate) : 0;
#endif
DCHECK_NOT_NULL(isolate);
HandleScopeData* current = isolate->handle_scope_data(); HandleScopeData* current = isolate->handle_scope_data();
std::swap(current->next, prev_next); std::swap(current->next, prev_next);
...@@ -86,6 +99,11 @@ void HandleScope::CloseScope(Isolate* isolate, Address* prev_next, ...@@ -86,6 +99,11 @@ void HandleScope::CloseScope(Isolate* isolate, Address* prev_next,
current->next, current->next,
static_cast<size_t>(reinterpret_cast<Address>(limit) - static_cast<size_t>(reinterpret_cast<Address>(limit) -
reinterpret_cast<Address>(current->next))); reinterpret_cast<Address>(current->next)));
#ifdef DEBUG
int after = FLAG_check_handle_count ? NumberOfHandles(isolate) : 0;
DCHECK_LT(after - before, kCheckHandleThreshold);
DCHECK_LT(before, kCheckHandleThreshold);
#endif
} }
template <typename T> template <typename T>
......
...@@ -187,9 +187,12 @@ inline std::ostream& operator<<(std::ostream& os, Handle<T> handle); ...@@ -187,9 +187,12 @@ inline std::ostream& operator<<(std::ostream& os, Handle<T> handle);
class HandleScope { class HandleScope {
public: public:
explicit inline HandleScope(Isolate* isolate); explicit inline HandleScope(Isolate* isolate);
inline HandleScope(HandleScope&& other) V8_NOEXCEPT;
inline ~HandleScope(); inline ~HandleScope();
inline HandleScope& operator=(HandleScope&& other) V8_NOEXCEPT;
// Counts the number of allocated handles. // Counts the number of allocated handles.
V8_EXPORT_PRIVATE static int NumberOfHandles(Isolate* isolate); V8_EXPORT_PRIVATE static int NumberOfHandles(Isolate* isolate);
......
...@@ -2917,7 +2917,7 @@ Handle<JSObject> Factory::NewJSObjectFromMap( ...@@ -2917,7 +2917,7 @@ Handle<JSObject> Factory::NewJSObjectFromMap(
DCHECK(js_obj->HasFastElements() || js_obj->HasFixedTypedArrayElements() || DCHECK(js_obj->HasFastElements() || js_obj->HasFixedTypedArrayElements() ||
js_obj->HasFastStringWrapperElements() || js_obj->HasFastStringWrapperElements() ||
js_obj->HasFastArgumentsElements()); js_obj->HasFastArgumentsElements() || js_obj->HasDictionaryElements());
return js_obj; return js_obj;
} }
......
This diff is collapsed.
...@@ -8,7 +8,6 @@ ...@@ -8,7 +8,6 @@
#include "src/heap/factory.h" #include "src/heap/factory.h"
#include "src/isolate.h" #include "src/isolate.h"
#include "src/objects.h" #include "src/objects.h"
#include "src/parsing/literal-buffer.h"
#include "src/zone/zone-containers.h" #include "src/zone/zone-containers.h"
namespace v8 { namespace v8 {
...@@ -16,6 +15,88 @@ namespace internal { ...@@ -16,6 +15,88 @@ namespace internal {
enum ParseElementResult { kElementFound, kElementNotFound }; enum ParseElementResult { kElementFound, kElementNotFound };
class JsonString final {
public:
JsonString()
: start_(0),
length_(0),
needs_conversion_(false),
internalize_(false),
has_escape_(false),
is_index_(false) {}
explicit JsonString(uint32_t index)
: index_(index),
length_(0),
needs_conversion_(false),
internalize_(false),
has_escape_(false),
is_index_(true) {}
JsonString(int start, int length, bool needs_conversion,
bool needs_internalization, bool has_escape)
: start_(start),
length_(length),
needs_conversion_(needs_conversion),
internalize_(needs_internalization ||
length_ <= kMaxInternalizedStringValueLength),
has_escape_(has_escape),
is_index_(false) {}
bool internalize() const {
DCHECK(!is_index_);
return internalize_;
}
bool needs_conversion() const {
DCHECK(!is_index_);
return needs_conversion_;
}
bool has_escape() const {
DCHECK(!is_index_);
return has_escape_;
}
int start() const {
DCHECK(!is_index_);
return start_;
}
int length() const {
DCHECK(!is_index_);
return length_;
}
uint32_t index() const {
DCHECK(is_index_);
return index_;
}
bool is_index() const { return is_index_; }
private:
static const int kMaxInternalizedStringValueLength = 25;
union {
const int start_;
const uint32_t index_;
};
const int length_;
const bool needs_conversion_ : 1;
const bool internalize_ : 1;
const bool has_escape_ : 1;
const bool is_index_ : 1;
};
struct JsonProperty {
JsonProperty() { UNREACHABLE(); }
explicit JsonProperty(const JsonString& string) : string(string) {}
JsonString string;
Handle<Object> value;
};
class JsonParseInternalizer { class JsonParseInternalizer {
public: public:
static MaybeHandle<Object> Internalize(Isolate* isolate, static MaybeHandle<Object> Internalize(Isolate* isolate,
...@@ -37,7 +118,6 @@ class JsonParseInternalizer { ...@@ -37,7 +118,6 @@ class JsonParseInternalizer {
enum class JsonToken : uint8_t { enum class JsonToken : uint8_t {
NUMBER, NUMBER,
NEGATIVE_NUMBER,
STRING, STRING,
LBRACE, LBRACE,
RBRACE, RBRACE,
...@@ -74,12 +154,25 @@ class JsonParser final { ...@@ -74,12 +154,25 @@ class JsonParser final {
static const int kEndOfString = -1; static const int kEndOfString = -1;
private: private:
template <typename LiteralChar> struct JsonContinuation {
Handle<String> MakeString(bool requires_internalization, enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
const Vector<const LiteralChar>& chars); JsonContinuation(Isolate* isolate, Type type, size_t index)
: scope(isolate),
Handle<String> MakeString(bool requires_internalization, int offset, type_(type),
int length); index(static_cast<uint32_t>(index)),
max_index(0),
elements(0) {}
Type type() const { return static_cast<Type>(type_); }
void set_type(Type type) { type_ = static_cast<uint8_t>(type); }
HandleScope scope;
// Unfortunately GCC doesn't like packing Type in two bits.
uint32_t type_ : 2;
uint32_t index : 30;
uint32_t max_index;
uint32_t elements;
};
JsonParser(Isolate* isolate, Handle<String> source); JsonParser(Isolate* isolate, Handle<String> source);
~JsonParser(); ~JsonParser();
...@@ -164,9 +257,20 @@ class JsonParser final { ...@@ -164,9 +257,20 @@ class JsonParser final {
// literals. The string must only be double-quoted (not single-quoted), and // literals. The string must only be double-quoted (not single-quoted), and
// the only allowed backslash-escapes are ", /, \, b, f, n, r, t and // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
// four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
Handle<String> ParseJsonString(bool requires_internalization, JsonString ScanJsonString(bool needs_internalization);
Handle<String> expected = Handle<String>()); JsonString ScanJsonPropertyKey(JsonContinuation* cont);
uc32 ScanUnicodeCharacter();
Handle<String> MakeString(const JsonString& string,
Handle<String> hint = Handle<String>());
template <typename SinkChar>
void DecodeString(SinkChar* sink, int start, int length);
template <typename SinkChar>
Handle<String> DecodeString(
const JsonString& string,
Handle<typename CharTraits<SinkChar>::String> intermediate,
Handle<String> hint);
// A JSON number (production JSONNumber) is a subset of the valid JavaScript // A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals. // decimal number literals.
...@@ -174,32 +278,19 @@ class JsonParser final { ...@@ -174,32 +278,19 @@ class JsonParser final {
// digit before and after a decimal point, may not have prefixed zeros (unless // digit before and after a decimal point, may not have prefixed zeros (unless
// the integer part is zero), and may include an exponent part (e.g., "e-10"). // the integer part is zero), and may include an exponent part (e.g., "e-10").
// Hexadecimal and octal numbers are not allowed. // Hexadecimal and octal numbers are not allowed.
Handle<Object> ParseJsonNumber(int sign, const Char* start); Handle<Object> ParseJsonNumber();
// Parse a single JSON value from input (grammar production JSONValue). // Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal, // A JSON value is either a (double-quoted) string literal, a number literal,
// one of "true", "false", or "null", or an object or array literal. // one of "true", "false", or "null", or an object or array literal.
Handle<Object> ParseJsonValue(); MaybeHandle<Object> ParseJsonValue();
// Parse a JSON object literal (grammar production JSONObject). Handle<Object> BuildJsonObject(
// An object literal is a squiggly-braced and comma separated sequence const JsonContinuation& cont,
// (possibly empty) of key/value pairs, where the key is a JSON string const std::vector<JsonProperty>& property_stack);
// literal, the value is a JSON value, and the two are separated by a colon. Handle<Object> BuildJsonArray(
// A JSON array doesn't allow numbers and identifiers as keys, like a const JsonContinuation& cont,
// JavaScript array. const std::vector<Handle<Object>>& element_stack);
Handle<Object> ParseJsonObject();
// Helper for ParseJsonObject. Parses the form "123": obj, which is recorded
// as an element, not a property. Returns false if we should retry parsing the
// key as a non-element. (Returns true if it's an index or hits EOS).
bool ParseElement(Handle<JSObject> json_object);
// Parses a JSON array literal (grammar production JSONArray). An array
// literal is a square-bracketed and comma separated sequence (possibly empty)
// of JSON values.
// A JSON array doesn't allow leaving out values from the sequence, nor does
// it allow a terminal comma, like a JavaScript array does.
Handle<Object> ParseJsonArray();
// Mark that a parsing error has happened at the current character. // Mark that a parsing error has happened at the current character.
void ReportUnexpectedCharacter(uc32 c); void ReportUnexpectedCharacter(uc32 c);
...@@ -231,14 +322,6 @@ class JsonParser final { ...@@ -231,14 +322,6 @@ class JsonParser final {
private: private:
static const bool kIsOneByte = sizeof(Char) == 1; static const bool kIsOneByte = sizeof(Char) == 1;
static const int kMaxInternalizedStringValueLength = 25;
// Casts |c| to uc32 avoiding LiteralBuffer::AddChar(char) in one-byte-strings
// with escapes that can result in two-byte strings.
void AddLiteralChar(uc32 c) { literal_buffer_.AddChar(c); }
void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
const Vector<const Handle<Object>>& properties);
bool is_at_end() const { bool is_at_end() const {
DCHECK_LE(cursor_, end_); DCHECK_LE(cursor_, end_);
...@@ -248,7 +331,6 @@ class JsonParser final { ...@@ -248,7 +331,6 @@ class JsonParser final {
int position() const { return static_cast<int>(cursor_ - chars_); } int position() const { return static_cast<int>(cursor_ - chars_); }
Isolate* isolate_; Isolate* isolate_;
Zone zone_;
const uint64_t hash_seed_; const uint64_t hash_seed_;
JsonToken next_; JsonToken next_;
// Indicates whether the bytes underneath source_ can relocate during GC. // Indicates whether the bytes underneath source_ can relocate during GC.
...@@ -265,11 +347,6 @@ class JsonParser final { ...@@ -265,11 +347,6 @@ class JsonParser final {
const Char* cursor_; const Char* cursor_;
const Char* end_; const Char* end_;
const Char* chars_; const Char* chars_;
LiteralBuffer literal_buffer_;
// Property handles are stored here inside ParseJsonObject.
ZoneVector<Handle<Object>> properties_;
}; };
// Explicit instantiation declarations. // Explicit instantiation declarations.
......
...@@ -63,7 +63,7 @@ LookupIterator LookupIterator::PropertyOrElement( ...@@ -63,7 +63,7 @@ LookupIterator LookupIterator::PropertyOrElement(
it.name_ = name; it.name_ = name;
return it; return it;
} }
return LookupIterator(receiver, name, holder, configuration); return LookupIterator(isolate, receiver, name, holder, configuration);
} }
LookupIterator LookupIterator::PropertyOrElement( LookupIterator LookupIterator::PropertyOrElement(
......
...@@ -1008,6 +1008,17 @@ ACCESSORS(JSAsyncFromSyncIterator, next, Object, kNextOffset) ...@@ -1008,6 +1008,17 @@ ACCESSORS(JSAsyncFromSyncIterator, next, Object, kNextOffset)
ACCESSORS(JSStringIterator, string, String, kStringOffset) ACCESSORS(JSStringIterator, string, String, kStringOffset)
SMI_ACCESSORS(JSStringIterator, index, kNextIndexOffset) SMI_ACCESSORS(JSStringIterator, index, kNextIndexOffset)
// If the fast-case backing storage takes up much more memory than a dictionary
// backing storage would, the object should have slow elements.
// static
static inline bool ShouldConvertToSlowElements(uint32_t used_elements,
uint32_t new_capacity) {
uint32_t size_threshold = NumberDictionary::kPreferFastElementsSizeFactor *
NumberDictionary::ComputeCapacity(used_elements) *
NumberDictionary::kEntrySize;
return size_threshold <= new_capacity;
}
static inline bool ShouldConvertToSlowElements(JSObject object, static inline bool ShouldConvertToSlowElements(JSObject object,
uint32_t capacity, uint32_t capacity,
uint32_t index, uint32_t index,
...@@ -1027,13 +1038,8 @@ static inline bool ShouldConvertToSlowElements(JSObject object, ...@@ -1027,13 +1038,8 @@ static inline bool ShouldConvertToSlowElements(JSObject object,
ObjectInYoungGeneration(object))) { ObjectInYoungGeneration(object))) {
return false; return false;
} }
// If the fast-case backing storage takes up much more memory than a return ShouldConvertToSlowElements(object->GetFastElementsUsage(),
// dictionary backing storage would, the object should have slow elements. *new_capacity);
int used_elements = object->GetFastElementsUsage();
uint32_t size_threshold = NumberDictionary::kPreferFastElementsSizeFactor *
NumberDictionary::ComputeCapacity(used_elements) *
NumberDictionary::kEntrySize;
return size_threshold <= *new_capacity;
} }
} // namespace internal } // namespace internal
......
...@@ -30,4 +30,5 @@ for (var i = 0; i < 100000; i++) { ...@@ -30,4 +30,5 @@ for (var i = 0; i < 100000; i++) {
str = "[1," + str + "]"; str = "[1," + str + "]";
} }
assertThrows(function() { JSON.parse(str); }, RangeError); // Make sure we don't overflow on very deeply nested JSON objects.
JSON.parse(str);
...@@ -72,7 +72,7 @@ function generate(n) { ...@@ -72,7 +72,7 @@ function generate(n) {
print("generating"); print("generating");
var str = generate(50000); var str = generate(30000);
print("parsing " + str.length); print("parsing " + str.length);
JSON.parse(str); JSON.parse(str);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment