Commit 6366f334 authored by Seth Brenith's avatar Seth Brenith Committed by V8 LUCI CQ

Fix leaks due to deoptimization literals

The GC already treats some embedded object pointers in Code as weak,
based on Code::IsWeakObject. If one of those embedded objects ends up
unmarked during a full mark-collect GC, then the Code is marked for lazy
deoptimization and the embedded objects are cleared. However, many of
those same objects are often held strongly by the deoptimization literal
array for the Code, which causes memory leaks. This change updates the
deoptimization literals array to store those objects weakly. Any Code
currently executing on the stack might need those deoptimization
literals in order to deoptimize, so the deoptimization literal array is
marked strongly in that case.

Design document:
https://docs.google.com/document/d/1gFRBYCeqz9Mysx8CVYQkldBbk3AZLo8UX0DMLZV_7qw/edit?usp=sharing

Bug: v8:4578
Change-Id: I02e86683c59371e9f88ecf523750c9c6afebdb39
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3160299Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarMichael Lippautz <mlippautz@chromium.org>
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Cr-Commit-Position: refs/heads/main@{#77805}
parent 9ab034ec
......@@ -980,8 +980,9 @@ Handle<DeoptimizationData> CodeGenerator::GenerateDeoptimizationData() {
data->SetSharedFunctionInfo(Smi::zero());
}
Handle<FixedArray> literals = isolate()->factory()->NewFixedArray(
static_cast<int>(deoptimization_literals_.size()), AllocationType::kOld);
Handle<DeoptimizationLiteralArray> literals =
isolate()->factory()->NewDeoptimizationLiteralArray(
static_cast<int>(deoptimization_literals_.size()));
for (unsigned i = 0; i < deoptimization_literals_.size(); i++) {
Handle<Object> object = deoptimization_literals_[i].Reify(isolate());
CHECK(!object.is_null());
......
......@@ -28,10 +28,9 @@ using base::ReadUnalignedValue;
namespace internal {
void TranslationArrayPrintSingleFrame(std::ostream& os,
TranslationArray translation_array,
int translation_index,
FixedArray literal_array) {
void TranslationArrayPrintSingleFrame(
std::ostream& os, TranslationArray translation_array, int translation_index,
DeoptimizationLiteralArray literal_array) {
DisallowGarbageCollection gc_oh_noes;
TranslationArrayIterator iterator(translation_array, translation_index);
disasm::NameConverter converter;
......@@ -725,8 +724,8 @@ void TranslatedFrame::Handlify() {
}
TranslatedFrame TranslatedState::CreateNextTranslatedFrame(
TranslationArrayIterator* iterator, FixedArray literal_array, Address fp,
FILE* trace_file) {
TranslationArrayIterator* iterator,
DeoptimizationLiteralArray literal_array, Address fp, FILE* trace_file) {
TranslationOpcode opcode = TranslationOpcodeFromInt(iterator->Next());
switch (opcode) {
case TranslationOpcode::INTERPRETED_FRAME: {
......@@ -959,8 +958,8 @@ void TranslatedState::CreateArgumentsElementsTranslatedValues(
// TranslationArrayIterator.
int TranslatedState::CreateNextTranslatedValue(
int frame_index, TranslationArrayIterator* iterator,
FixedArray literal_array, Address fp, RegisterValues* registers,
FILE* trace_file) {
DeoptimizationLiteralArray literal_array, Address fp,
RegisterValues* registers, FILE* trace_file) {
disasm::NameConverter converter;
TranslatedFrame& frame = frames_[frame_index];
......@@ -1299,8 +1298,9 @@ TranslatedState::TranslatedState(const JavaScriptFrame* frame)
void TranslatedState::Init(Isolate* isolate, Address input_frame_pointer,
Address stack_frame_pointer,
TranslationArrayIterator* iterator,
FixedArray literal_array, RegisterValues* registers,
FILE* trace_file, int formal_parameter_count,
DeoptimizationLiteralArray literal_array,
RegisterValues* registers, FILE* trace_file,
int formal_parameter_count,
int actual_argument_count) {
DCHECK(frames_.empty());
......@@ -2118,9 +2118,9 @@ bool TranslatedState::DoUpdateFeedback() {
return false;
}
void TranslatedState::ReadUpdateFeedback(TranslationArrayIterator* iterator,
FixedArray literal_array,
FILE* trace_file) {
void TranslatedState::ReadUpdateFeedback(
TranslationArrayIterator* iterator,
DeoptimizationLiteralArray literal_array, FILE* trace_file) {
CHECK_EQ(TranslationOpcode::UPDATE_FEEDBACK,
TranslationOpcodeFromInt(iterator->Next()));
feedback_vector_ = FeedbackVector::cast(literal_array.get(iterator->Next()));
......
......@@ -30,7 +30,7 @@ class TranslatedState;
void TranslationArrayPrintSingleFrame(std::ostream& os,
TranslationArray translation_array,
int translation_index,
FixedArray literal_array);
DeoptimizationLiteralArray literal_array);
// The Translated{Value,Frame,State} class hierarchy are a set of utility
// functions to work with the combination of translations (built from a
......@@ -382,7 +382,7 @@ class TranslatedState {
void Init(Isolate* isolate, Address input_frame_pointer,
Address stack_frame_pointer, TranslationArrayIterator* iterator,
FixedArray literal_array, RegisterValues* registers,
DeoptimizationLiteralArray literal_array, RegisterValues* registers,
FILE* trace_file, int parameter_count, int actual_argument_count);
void VerifyMaterializedObjects();
......@@ -397,13 +397,14 @@ class TranslatedState {
// details, see the code around ReplaceElementsArrayWithCopy.
enum Purpose { kDeoptimization, kFrameInspection };
TranslatedFrame CreateNextTranslatedFrame(TranslationArrayIterator* iterator,
FixedArray literal_array,
Address fp, FILE* trace_file);
TranslatedFrame CreateNextTranslatedFrame(
TranslationArrayIterator* iterator,
DeoptimizationLiteralArray literal_array, Address fp, FILE* trace_file);
int CreateNextTranslatedValue(int frame_index,
TranslationArrayIterator* iterator,
FixedArray literal_array, Address fp,
RegisterValues* registers, FILE* trace_file);
DeoptimizationLiteralArray literal_array,
Address fp, RegisterValues* registers,
FILE* trace_file);
Address DecompressIfNeeded(intptr_t value);
void CreateArgumentsElementsTranslatedValues(int frame_index,
Address input_frame_pointer,
......@@ -439,7 +440,8 @@ class TranslatedState {
Handle<Map> map, const DisallowGarbageCollection& no_gc);
void ReadUpdateFeedback(TranslationArrayIterator* iterator,
FixedArray literal_array, FILE* trace_file);
DeoptimizationLiteralArray literal_array,
FILE* trace_file);
TranslatedValue* ResolveCapturedObject(TranslatedValue* slot);
TranslatedValue* GetValueByObjectIndex(int object_index);
......
......@@ -543,7 +543,7 @@ void StackFrame::IteratePc(RootVisitor* v, Address* pc_address,
holder.GetHeap()->GcSafeCodeContains(holder, old_pc));
unsigned pc_offset = holder.GetOffsetFromInstructionStart(isolate_, old_pc);
Object code = holder;
v->VisitRootPointer(Root::kStackRoots, nullptr, FullObjectSlot(&code));
v->VisitRunningCode(FullObjectSlot(&code));
if (code == holder) return;
holder = Code::unchecked_cast(code);
Address pc = holder.InstructionStart(isolate_, old_pc) + pc_offset;
......@@ -1775,7 +1775,7 @@ void OptimizedFrame::GetFunctions(
DeoptimizationData const data = GetDeoptimizationData(&deopt_index);
DCHECK(!data.is_null());
DCHECK_NE(Safepoint::kNoDeoptimizationIndex, deopt_index);
FixedArray const literal_array = data.LiteralArray();
DeoptimizationLiteralArray const literal_array = data.LiteralArray();
TranslationArrayIterator it(data.TranslationByteArray(),
data.TranslationIndex(deopt_index).value());
......
......@@ -2167,6 +2167,12 @@ Handle<JSObject> Factory::NewExternal(void* value) {
return external;
}
Handle<DeoptimizationLiteralArray> Factory::NewDeoptimizationLiteralArray(
int length) {
return Handle<DeoptimizationLiteralArray>::cast(
NewWeakFixedArray(length, AllocationType::kOld));
}
Handle<Code> Factory::NewOffHeapTrampolineFor(Handle<Code> code,
Address off_heap_entry) {
CHECK_NOT_NULL(isolate()->embedded_blob_code());
......
......@@ -658,6 +658,8 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
// Create an External object for V8's external API.
Handle<JSObject> NewExternal(void* value);
Handle<DeoptimizationLiteralArray> NewDeoptimizationLiteralArray(int length);
// Allocates a new code object and initializes it as the trampoline to the
// given off-heap entry point.
Handle<Code> NewOffHeapTrampolineFor(Handle<Code> code,
......
......@@ -1046,6 +1046,36 @@ class MarkCompactCollector::RootMarkingVisitor final : public RootVisitor {
}
}
void VisitRunningCode(FullObjectSlot p) final {
Code code = Code::cast(*p);
// If Code is currently executing, then we must not remove its
// deoptimization literals, which it might need in order to successfully
// deoptimize.
//
// Must match behavior in RootsReferencesExtractor::VisitRunningCode, so
// that heap snapshots accurately describe the roots.
if (code.kind() != CodeKind::BASELINE) {
DeoptimizationData deopt_data =
DeoptimizationData::cast(code.deoptimization_data());
if (deopt_data.length() > 0) {
DeoptimizationLiteralArray literals = deopt_data.LiteralArray();
int literals_length = literals.length();
for (int i = 0; i < literals_length; ++i) {
MaybeObject maybe_literal = literals.Get(i);
HeapObject heap_literal;
if (maybe_literal.GetHeapObject(&heap_literal)) {
MarkObjectByPointer(Root::kStackRoots,
FullObjectSlot(&heap_literal));
}
}
}
}
// And then mark the Code itself.
VisitRootPointer(Root::kStackRoots, nullptr, p);
}
private:
V8_INLINE void MarkObjectByPointer(Root root, FullObjectSlot p) {
Object object = *p;
......
......@@ -44,6 +44,7 @@ CAST_ACCESSOR(Code)
CAST_ACCESSOR(CodeDataContainer)
CAST_ACCESSOR(DependentCode)
CAST_ACCESSOR(DeoptimizationData)
CAST_ACCESSOR(DeoptimizationLiteralArray)
int AbstractCode::raw_instruction_size() {
if (IsCode()) {
......@@ -1156,7 +1157,7 @@ int BytecodeArray::SizeIncludingMetadata() {
DEFINE_DEOPT_ELEMENT_ACCESSORS(TranslationByteArray, TranslationArray)
DEFINE_DEOPT_ELEMENT_ACCESSORS(InlinedFunctionCount, Smi)
DEFINE_DEOPT_ELEMENT_ACCESSORS(LiteralArray, FixedArray)
DEFINE_DEOPT_ELEMENT_ACCESSORS(LiteralArray, DeoptimizationLiteralArray)
DEFINE_DEOPT_ELEMENT_ACCESSORS(OsrBytecodeOffset, Smi)
DEFINE_DEOPT_ELEMENT_ACCESSORS(OsrPcOffset, Smi)
DEFINE_DEOPT_ELEMENT_ACCESSORS(OptimizationId, Smi)
......@@ -1184,6 +1185,42 @@ int DeoptimizationData::DeoptCount() {
return (length() - kFirstDeoptEntryIndex) / kDeoptEntrySize;
}
inline DeoptimizationLiteralArray::DeoptimizationLiteralArray(Address ptr)
: WeakFixedArray(ptr) {
// No type check is possible beyond that for WeakFixedArray.
}
inline Object DeoptimizationLiteralArray::get(int index) const {
return get(GetPtrComprCageBase(*this), index);
}
inline Object DeoptimizationLiteralArray::get(PtrComprCageBase cage_base,
int index) const {
MaybeObject maybe = Get(cage_base, index);
// Slots in the DeoptimizationLiteralArray should only be cleared when there
// is no possible code path that could need that slot. This works because the
// weakly-held deoptimization literals are basically local variables that
// TurboFan has decided not to keep on the stack. Thus, if the deoptimization
// literal goes away, then whatever code needed it should be unreachable. The
// exception is currently running Code: in that case, the deoptimization
// literals array might be the only thing keeping the target object alive.
// Thus, when a Code is running, we strongly mark all of its deoptimization
// literals.
CHECK(!maybe.IsCleared());
return maybe.GetHeapObjectOrSmi();
}
inline void DeoptimizationLiteralArray::set(int index, Object value) {
MaybeObject maybe = MaybeObject::FromObject(value);
if (value.IsHeapObject() &&
Code::IsWeakObjectInOptimizedCode(HeapObject::cast(value))) {
maybe = MaybeObject::MakeWeak(maybe);
}
Set(index, maybe);
}
} // namespace internal
} // namespace v8
......
......@@ -354,7 +354,7 @@ bool Code::Inlines(SharedFunctionInfo sfi) {
DeoptimizationData::cast(deoptimization_data());
if (data.length() == 0) return false;
if (data.SharedFunctionInfo() == sfi) return true;
FixedArray const literals = data.LiteralArray();
DeoptimizationLiteralArray const literals = data.LiteralArray();
int const inlined_count = data.InlinedFunctionCount().value();
for (int i = 0; i < inlined_count; ++i) {
if (SharedFunctionInfo::cast(literals.get(i)) == sfi) return true;
......
......@@ -992,6 +992,24 @@ class BytecodeArray
TQ_OBJECT_CONSTRUCTORS(BytecodeArray)
};
// This class holds data required during deoptimization. It does not have its
// own instance type.
class DeoptimizationLiteralArray : public WeakFixedArray {
public:
// Getters for literals. These include runtime checks that the pointer was not
// cleared, if the literal was held weakly.
inline Object get(int index) const;
inline Object get(PtrComprCageBase cage_base, int index) const;
// Setter for literals. This will set the object as strong or weak depending
// on Code::IsWeakObjectInOptimizedCode.
inline void set(int index, Object value);
DECL_CAST(DeoptimizationLiteralArray)
OBJECT_CONSTRUCTORS(DeoptimizationLiteralArray, WeakFixedArray);
};
// DeoptimizationData is a fixed array used to hold the deoptimization data for
// optimized code. It also contains information about functions that were
// inlined. If N different functions were inlined then the first N elements of
......@@ -1032,7 +1050,7 @@ class DeoptimizationData : public FixedArray {
DECL_ELEMENT_ACCESSORS(TranslationByteArray, TranslationArray)
DECL_ELEMENT_ACCESSORS(InlinedFunctionCount, Smi)
DECL_ELEMENT_ACCESSORS(LiteralArray, FixedArray)
DECL_ELEMENT_ACCESSORS(LiteralArray, DeoptimizationLiteralArray)
DECL_ELEMENT_ACCESSORS(OsrBytecodeOffset, Smi)
DECL_ELEMENT_ACCESSORS(OsrPcOffset, Smi)
DECL_ELEMENT_ACCESSORS(OptimizationId, Smi)
......
......@@ -89,6 +89,13 @@ class RootVisitor {
UNREACHABLE();
}
// Visits a single pointer which is Code from the execution stack.
virtual void VisitRunningCode(FullObjectSlot p) {
// For most visitors, currently running Code is no different than any other
// on-stack pointer.
VisitRootPointer(Root::kStackRoots, nullptr, p);
}
// Intended for serialization/deserialization checking: insert, or
// check for the presence of, a tag at this position in the stream.
// Also used for marking up GC roots in heap snapshots.
......
......@@ -1603,6 +1603,32 @@ class RootsReferencesExtractor : public RootVisitor {
}
}
void VisitRunningCode(FullObjectSlot p) override {
// Must match behavior in
// MarkCompactCollector::RootMarkingVisitor::VisitRunningCode, which treats
// deoptimization literals in running code as stack roots.
Code code = Code::cast(*p);
if (code.kind() != CodeKind::BASELINE) {
DeoptimizationData deopt_data =
DeoptimizationData::cast(code.deoptimization_data());
if (deopt_data.length() > 0) {
DeoptimizationLiteralArray literals = deopt_data.LiteralArray();
int literals_length = literals.length();
for (int i = 0; i < literals_length; ++i) {
MaybeObject maybe_literal = literals.Get(i);
HeapObject heap_literal;
if (maybe_literal.GetHeapObject(&heap_literal)) {
VisitRootPointer(Root::kStackRoots, nullptr,
FullObjectSlot(&heap_literal));
}
}
}
}
// Finally visit the Code itself.
VisitRootPointer(Root::kStackRoots, nullptr, p);
}
private:
V8HeapExplorer* explorer_;
bool visiting_weak_roots_;
......
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --expose-gc --opt
// Flags: --no-stress-opt --no-always-opt --no-assert-types
// This weak ref is for checking whether the closure-allocated object o got
// collected as it should.
var weak;
// Return a function which refers to a variable from its enclosing function.
function makeFn() {
var o = {};
weak = new WeakRef(o);
o.num = 0;
return () => { return ++o.num; };
}
// Simple wrapper function which will cause inlining.
function g(f) {
f();
}
// Optimize g so that it inlines a specific function instance created by makeFn,
// and then drop all user-visible references to that function instance.
(function () {
var fn = makeFn();
%PrepareFunctionForOptimization(g);
%PrepareFunctionForOptimization(fn);
g(fn);
%OptimizeFunctionOnNextCall(g);
g(fn);
})();
// WeakRefs created during the current microtask are strong, so defer the rest.
setTimeout(() => {
// Since the function inlined by g no longer exists, it should deopt and
// release the inner function.
gc();
// Check that the memory leak described in v8:4578 no longer happens.
assertEquals(undefined, weak.deref());
// Next, let's check the opposite case: if an optimized function's Code is
// currently running at the time of gc, then it must keep its deoptimization
// data alive.
// Another simple wrapper function, but this one causes a GC internally.
function h(f) {
gcAndCheckState(); // Non-inlined call
f(); // Inlined call
}
var doCheck = false;
function gcAndCheckState() {
if (!doCheck) return;
// It's possible that h has already deoptimized by this point if
// --stress-incremental-marking caused additional gc cycles.
var optimized = isOptimized(h);
gc();
// If the optimized code for h is still on the stack, then it must not clear
// out its own deoptimization data. Eventually h will deopt due to the wrong
// function being passed, but only after this function has returned.
if (optimized) {
assertNotEquals(undefined, weak.deref());
} else {
assertEquals(undefined, weak.deref());
}
}
// Don't inline gcAndCheckState, to avoid the possibility that its content
// could cause h to deoptimize.
%NeverOptimizeFunction(gcAndCheckState);
// Optimize h to inline a specific function instance, and then drop all
// user-visible references to that inlined function.
(function () {
var fn = makeFn();
%PrepareFunctionForOptimization(h);
%PrepareFunctionForOptimization(fn);
h(fn);
%OptimizeFunctionOnNextCall(h);
h(fn);
})();
// Defer again so the WeakRef can expire.
setTimeout(() => {
doCheck = true;
h(() => {});
}, 0);
}, 0);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment