Commit c0a63243 authored by Leszek Swirski's avatar Leszek Swirski Committed by V8 LUCI CQ

[maglev] Start implenting inlining

Add a --maglev-inlining flag, and add some half-baked support for
inlining functions when there is call feedback.

When the flag is enabled and there is call feedback, we create a nested
MaglevGraphBuilder for the current graph, and pause building the graph
of the outer function. We manually set up its prologue to set up its
frame with the arguments pass into the call, build the body with the
nested graph builder. This inner builder knows that it is building an
inlined function, and all Return bytecodes will instead emit a Jump to a
single merge block at the end of the function, where execution of the
outer function can resume.

These inner function basic blocks are wired into the outer graph with
new JumpToInline and JumpFromInline control nodes. The idea is that
subsequent passes will know what the inline function is, and will use
these to manage the function stack (particularly for codegen and
especially deopts).

Bug: v8:7700
Change-Id: I4e9b153f8cf4d06c56e7be6365e7a18b86a773c0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3585958
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Reviewed-by: 's avatarJakob Linke <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80077}
parent f98e1f4d
......@@ -511,6 +511,8 @@ DEFINE_BOOL(future, FUTURE_BOOL,
#ifdef V8_ENABLE_MAGLEV
#define V8_ENABLE_MAGLEV_BOOL true
DEFINE_BOOL(maglev, false, "enable the maglev optimizing compiler")
DEFINE_BOOL(maglev_inlining, false,
"enable inlining in the maglev optimizing compiler")
#else
#define V8_ENABLE_MAGLEV_BOOL false
DEFINE_BOOL_READONLY(maglev, false, "enable the maglev optimizing compiler")
......
......@@ -4,6 +4,7 @@
#include "src/maglev/maglev-code-generator.h"
#include "src/base/hashmap.h"
#include "src/codegen/code-desc.h"
#include "src/codegen/register.h"
#include "src/codegen/safepoint-table.h"
......@@ -19,6 +20,7 @@
#include "src/maglev/maglev-ir.h"
#include "src/maglev/maglev-regalloc-data.h"
#include "src/objects/code-inl.h"
#include "src/utils/identity-map.h"
namespace v8 {
namespace internal {
......@@ -318,15 +320,15 @@ class MaglevCodeGeneratorImpl final {
}
private:
static constexpr int kFunctionLiteralIndex = 0;
static constexpr int kOptimizedOutConstantIndex = 1;
static constexpr int kOptimizedOutConstantIndex = 0;
MaglevCodeGeneratorImpl(MaglevCompilationInfo* compilation_info, Graph* graph)
: safepoint_table_builder_(compilation_info->zone()),
translation_array_builder_(compilation_info->zone()),
code_gen_state_(compilation_info, safepoint_table_builder()),
processor_(compilation_info, &code_gen_state_),
graph_(graph) {}
graph_(graph),
deopt_literals_(compilation_info->isolate()->heap()) {}
MaybeHandle<Code> Generate() {
EmitCode();
......@@ -353,6 +355,14 @@ class MaglevCodeGeneratorImpl final {
void EmitDeopts() {
deopt_exit_start_offset_ = __ pc_offset();
// We'll emit the optimized out constant a bunch of times, so to avoid
// looking it up in the literal map every time, add it now with the fixed
// offset 0.
int optimized_out_constant_index =
GetDeoptLiteral(ReadOnlyRoots(isolate()).optimized_out());
USE(optimized_out_constant_index);
DCHECK_EQ(kOptimizedOutConstantIndex, optimized_out_constant_index);
__ RecordComment("-- Non-lazy deopts");
for (EagerDeoptInfo* deopt_info : code_gen_state_.eager_deopts()) {
EmitEagerDeopt(deopt_info);
......@@ -381,39 +391,51 @@ class MaglevCodeGeneratorImpl final {
}
}
void EmitEagerDeopt(EagerDeoptInfo* deopt_info) {
int frame_count = 1;
int jsframe_count = 1;
int update_feedback_count = 0;
deopt_info->deopt_index = translation_array_builder_.BeginTranslation(
frame_count, jsframe_count, update_feedback_count);
const MaglevCompilationUnit& compilation_unit =
*code_gen_state_.compilation_info()->toplevel_compilation_unit();
const InputLocation* EmitDeoptFrame(const MaglevCompilationUnit& unit,
const CheckpointedInterpreterState& state,
const InputLocation* input_locations) {
if (state.parent) {
// Deopt input locations are in the order of deopt frame emission, so
// update the pointer after emitting the parent frame.
input_locations =
EmitDeoptFrame(*unit.caller(), *state.parent, input_locations);
}
// Returns are used for updating an accumulator or register after a lazy
// deopt.
const int return_offset = 0;
const int return_count = 0;
translation_array_builder_.BeginInterpretedFrame(
deopt_info->state.bytecode_position, kFunctionLiteralIndex,
compilation_unit.register_count(), return_offset, return_count);
state.bytecode_position,
GetDeoptLiteral(*unit.shared_function_info().object()),
unit.register_count(), return_offset, return_count);
EmitDeoptFrameValues(compilation_unit, deopt_info->state.register_frame,
deopt_info->input_locations,
return EmitDeoptFrameValues(unit, state.register_frame, input_locations,
interpreter::Register::invalid_value());
}
void EmitEagerDeopt(EagerDeoptInfo* deopt_info) {
int frame_count = 1 + deopt_info->unit.inlining_depth();
int jsframe_count = frame_count;
int update_feedback_count = 0;
deopt_info->deopt_index = translation_array_builder_.BeginTranslation(
frame_count, jsframe_count, update_feedback_count);
EmitDeoptFrame(deopt_info->unit, deopt_info->state,
deopt_info->input_locations);
}
void EmitLazyDeopt(LazyDeoptInfo* deopt_info) {
const MaglevCompilationUnit& unit = deopt_info->unit;
DCHECK_NULL(unit.caller());
DCHECK_EQ(unit.inlining_depth(), 0);
int frame_count = 1;
int jsframe_count = 1;
int update_feedback_count = 0;
deopt_info->deopt_index = translation_array_builder_.BeginTranslation(
frame_count, jsframe_count, update_feedback_count);
const MaglevCompilationUnit& compilation_unit =
*code_gen_state_.compilation_info()->toplevel_compilation_unit();
// Return offsets are counted from the end of the translation frame, which
// is the array [parameters..., locals..., accumulator].
int return_offset;
......@@ -429,20 +451,20 @@ class MaglevCodeGeneratorImpl final {
// ^
// and this calculation gives, correctly:
// 2 + 2 - 1 = 3
return_offset = compilation_unit.register_count() +
compilation_unit.parameter_count() -
return_offset = unit.register_count() + unit.parameter_count() -
deopt_info->result_location.ToParameterIndex();
} else {
return_offset = compilation_unit.register_count() -
deopt_info->result_location.index();
return_offset =
unit.register_count() - deopt_info->result_location.index();
}
// TODO(leszeks): Support lazy deopts with multiple return values.
int return_count = 1;
translation_array_builder_.BeginInterpretedFrame(
deopt_info->state.bytecode_position, kFunctionLiteralIndex,
compilation_unit.register_count(), return_offset, return_count);
deopt_info->state.bytecode_position,
GetDeoptLiteral(*unit.shared_function_info().object()),
unit.register_count(), return_offset, return_count);
EmitDeoptFrameValues(compilation_unit, deopt_info->state.register_frame,
EmitDeoptFrameValues(unit, deopt_info->state.register_frame,
deopt_info->input_locations,
deopt_info->result_location);
}
......@@ -500,15 +522,20 @@ class MaglevCodeGeneratorImpl final {
code_gen_state_.GetFramePointerOffsetForStackSlot(operand));
}
void EmitDeoptFrameValues(
const InputLocation* EmitDeoptFrameValues(
const MaglevCompilationUnit& compilation_unit,
const CompactInterpreterFrameState* checkpoint_state,
const InputLocation* input_locations,
interpreter::Register result_location) {
// Closure
if (compilation_unit.inlining_depth() == 0) {
int closure_index = DeoptStackSlotIndexFromFPOffset(
StandardFrameConstants::kFunctionOffset);
translation_array_builder_.StoreStackSlot(closure_index);
} else {
translation_array_builder_.StoreLiteral(
GetDeoptLiteral(*compilation_unit.function().object()));
}
// TODO(leszeks): The input locations array happens to be in the same order
// as parameters+locals+accumulator are accessed here. We should make this
......@@ -573,6 +600,8 @@ class MaglevCodeGeneratorImpl final {
translation_array_builder_.StoreLiteral(kOptimizedOutConstantIndex);
}
}
return input_location;
}
void EmitMetadata() {
......@@ -610,6 +639,7 @@ class MaglevCodeGeneratorImpl final {
translation_array_builder_.ToTranslationArray(isolate()->factory());
data->SetTranslationByteArray(*translation_array);
// TODO(leszeks): Fix with the real inlined function count.
data->SetInlinedFunctionCount(Smi::zero());
// TODO(leszeks): Support optimization IDs
data->SetOptimizationId(Smi::zero());
......@@ -624,18 +654,17 @@ class MaglevCodeGeneratorImpl final {
->shared_function_info()
.object());
// TODO(leszeks): Proper literals array.
Handle<DeoptimizationLiteralArray> literals =
isolate()->factory()->NewDeoptimizationLiteralArray(2);
literals->set(kFunctionLiteralIndex, *code_gen_state_.compilation_info()
->toplevel_compilation_unit()
->shared_function_info()
.object());
literals->set(kOptimizedOutConstantIndex,
ReadOnlyRoots(isolate()).optimized_out());
isolate()->factory()->NewDeoptimizationLiteralArray(
deopt_literals_.size());
IdentityMap<int, base::DefaultAllocationPolicy>::IteratableScope iterate(
&deopt_literals_);
for (auto it = iterate.begin(); it != iterate.end(); ++it) {
literals->set(*it.entry(), it.key());
}
data->SetLiteralArray(*literals);
// TODO(leszeks): Fix once we have inlining.
// TODO(leszeks): Fix with the real inlining positions.
Handle<PodArray<InliningPosition>> inlining_positions =
PodArray<InliningPosition>::New(isolate(), 0);
data->SetInliningPositions(*inlining_positions);
......@@ -687,11 +716,21 @@ class MaglevCodeGeneratorImpl final {
return &translation_array_builder_;
}
int GetDeoptLiteral(Object obj) {
IdentityMapFindResult<int> res = deopt_literals_.FindOrInsert(obj);
if (!res.already_exists) {
DCHECK_EQ(0, *res.entry);
*res.entry = deopt_literals_.size() - 1;
}
return *res.entry;
}
SafepointTableBuilder safepoint_table_builder_;
TranslationArrayBuilder translation_array_builder_;
MaglevCodeGenState code_gen_state_;
GraphProcessor<MaglevCodeGeneratingNodeProcessor> processor_;
Graph* const graph_;
IdentityMap<int, base::DefaultAllocationPolicy> deopt_literals_;
int deopt_exit_start_offset_ = -1;
};
......
......@@ -13,9 +13,11 @@ namespace v8 {
namespace internal {
namespace maglev {
MaglevCompilationUnit::MaglevCompilationUnit(MaglevCompilationInfo* info,
MaglevCompilationUnit::MaglevCompilationUnit(
MaglevCompilationInfo* info, const MaglevCompilationUnit* caller,
Handle<JSFunction> function)
: info_(info),
caller_(caller),
function_(MakeRef(broker(), function)),
shared_function_info_(function_.shared()),
bytecode_(shared_function_info_.GetBytecodeArray()),
......@@ -24,7 +26,8 @@ MaglevCompilationUnit::MaglevCompilationUnit(MaglevCompilationInfo* info,
bytecode_analysis_(bytecode_.object(), zone(), BytecodeOffset::None(),
true),
register_count_(bytecode_.register_count()),
parameter_count_(bytecode_.parameter_count()) {}
parameter_count_(bytecode_.parameter_count()),
inlining_depth_(caller == nullptr ? 0 : caller->inlining_depth_ + 1) {}
compiler::JSHeapBroker* MaglevCompilationUnit::broker() const {
return info_->broker();
......
......@@ -24,18 +24,27 @@ class MaglevCompilationUnit : public ZoneObject {
public:
static MaglevCompilationUnit* New(Zone* zone, MaglevCompilationInfo* info,
Handle<JSFunction> function) {
return zone->New<MaglevCompilationUnit>(info, function);
return zone->New<MaglevCompilationUnit>(info, nullptr, function);
}
static MaglevCompilationUnit* NewInner(Zone* zone,
const MaglevCompilationUnit* caller,
Handle<JSFunction> function) {
return zone->New<MaglevCompilationUnit>(caller->info(), caller, function);
}
MaglevCompilationUnit(MaglevCompilationInfo* info,
const MaglevCompilationUnit* caller,
Handle<JSFunction> function);
MaglevCompilationInfo* info() const { return info_; }
const MaglevCompilationUnit* caller() const { return caller_; }
compiler::JSHeapBroker* broker() const;
Isolate* isolate() const;
LocalIsolate* local_isolate() const;
Zone* zone() const;
int register_count() const { return register_count_; }
int parameter_count() const { return parameter_count_; }
int inlining_depth() const { return inlining_depth_; }
bool has_graph_labeller() const;
MaglevGraphLabeller* graph_labeller() const;
const compiler::SharedFunctionInfoRef& shared_function_info() const {
......@@ -52,6 +61,7 @@ class MaglevCompilationUnit : public ZoneObject {
private:
MaglevCompilationInfo* const info_;
const MaglevCompilationUnit* const caller_;
const compiler::JSFunctionRef function_;
const compiler::SharedFunctionInfoRef shared_function_info_;
const compiler::BytecodeArrayRef bytecode_;
......@@ -59,6 +69,7 @@ class MaglevCompilationUnit : public ZoneObject {
const compiler::BytecodeAnalysis bytecode_analysis_;
const int register_count_;
const int parameter_count_;
const int inlining_depth_;
};
} // namespace maglev
......
......@@ -108,31 +108,39 @@ class UseMarkingProcessor {
}
private:
void MarkCheckpointNodes(NodeBase* node, const EagerDeoptInfo* deopt_info,
const ProcessingState& state) {
const MaglevCompilationUnit& compilation_unit =
*state.compilation_info()->toplevel_compilation_unit();
void MarkCheckpointNodes(NodeBase* node, const MaglevCompilationUnit& unit,
const CheckpointedInterpreterState* checkpoint_state,
InputLocation* input_locations,
const ProcessingState& state, int& index) {
if (checkpoint_state->parent) {
MarkCheckpointNodes(node, *unit.caller(), checkpoint_state->parent,
input_locations, state, index);
}
const CompactInterpreterFrameState* register_frame =
deopt_info->state.register_frame;
checkpoint_state->register_frame;
int use_id = node->id();
int index = 0;
register_frame->ForEachValue(
compilation_unit, [&](ValueNode* node, interpreter::Register reg) {
node->mark_use(use_id, &deopt_info->input_locations[index++]);
unit, [&](ValueNode* node, interpreter::Register reg) {
node->mark_use(use_id, &input_locations[index++]);
});
}
void MarkCheckpointNodes(NodeBase* node, const EagerDeoptInfo* deopt_info,
const ProcessingState& state) {
int index = 0;
MarkCheckpointNodes(node, deopt_info->unit, &deopt_info->state,
deopt_info->input_locations, state, index);
}
void MarkCheckpointNodes(NodeBase* node, const LazyDeoptInfo* deopt_info,
const ProcessingState& state) {
const MaglevCompilationUnit& compilation_unit =
*state.compilation_info()->toplevel_compilation_unit();
const CompactInterpreterFrameState* register_frame =
deopt_info->state.register_frame;
int use_id = node->id();
int index = 0;
register_frame->ForEachValue(
compilation_unit, [&](ValueNode* node, interpreter::Register reg) {
deopt_info->unit, [&](ValueNode* node, interpreter::Register reg) {
// Skip over the result location.
if (reg == deopt_info->result_location) return;
node->mark_use(use_id, &deopt_info->input_locations[index++]);
......
......@@ -38,14 +38,18 @@ int LoadSimpleFieldHandler(FieldIndex field_index) {
MaglevGraphBuilder::MaglevGraphBuilder(LocalIsolate* local_isolate,
MaglevCompilationUnit* compilation_unit,
Graph* graph)
Graph* graph, MaglevGraphBuilder* parent)
: local_isolate_(local_isolate),
compilation_unit_(compilation_unit),
parent_(parent),
graph_(graph),
iterator_(bytecode().object()),
jump_targets_(zone()->NewArray<BasicBlockRef>(bytecode().length())),
// Add an extra jump_target slot for the inline exit if needed.
jump_targets_(zone()->NewArray<BasicBlockRef>(bytecode().length() +
(is_inline() ? 1 : 0))),
// Overallocate merge_states_ by one to allow always looking up the
// next offset.
// next offset. This overallocated slot can also be used for the inline
// exit when needed.
merge_states_(zone()->NewArray<MergePointInterpreterFrameState*>(
bytecode().length() + 1)),
current_interpreter_frame_(*compilation_unit_) {
......@@ -57,6 +61,16 @@ MaglevGraphBuilder::MaglevGraphBuilder(LocalIsolate* local_isolate,
new (&jump_targets_[i]) BasicBlockRef();
}
if (is_inline()) {
DCHECK_NOT_NULL(parent_);
DCHECK_GT(compilation_unit->inlining_depth(), 0);
// The allocation/initialisation logic here relies on inline_exit_offset
// being the offset one past the end of the bytecode.
DCHECK_EQ(inline_exit_offset(), bytecode().length());
merge_states_[inline_exit_offset()] = nullptr;
new (&jump_targets_[inline_exit_offset()]) BasicBlockRef();
}
CalculatePredecessorCounts();
for (auto& offset_and_info : bytecode_analysis().GetLoopInfos()) {
......@@ -70,15 +84,25 @@ MaglevGraphBuilder::MaglevGraphBuilder(LocalIsolate* local_isolate,
*compilation_unit_, offset, NumPredecessors(offset), liveness,
&loop_info);
}
}
void MaglevGraphBuilder::StartPrologue() {
current_block_ = zone()->New<BasicBlock>(nullptr);
block_offset_ = -1;
}
for (int i = 0; i < parameter_count(); i++) {
BasicBlock* MaglevGraphBuilder::EndPrologue() {
BasicBlock* first_block = CreateBlock<Jump>({}, &jump_targets_[0]);
MergeIntoFrameState(first_block, 0);
return first_block;
}
void MaglevGraphBuilder::SetArgument(int i, ValueNode* value) {
interpreter::Register reg = interpreter::Register::FromParameterIndex(i);
current_interpreter_frame_.set(reg, AddNewNode<InitialValue>({}, reg));
}
current_interpreter_frame_.set(reg, value);
}
void MaglevGraphBuilder::BuildRegisterFrameInitialization() {
// TODO(leszeks): Extract out a separate "incoming context/closure" nodes,
// to be able to read in the machine register but also use the frame-spilled
// slot.
......@@ -109,9 +133,6 @@ MaglevGraphBuilder::MaglevGraphBuilder(LocalIsolate* local_isolate,
for (; register_index < register_count(); register_index++) {
StoreRegister(interpreter::Register(register_index), undefined_value);
}
BasicBlock* first_block = CreateBlock<Jump>({}, &jump_targets_[0]);
MergeIntoFrameState(first_block, 0);
}
// TODO(v8:7700): Clean up after all bytecodes are supported.
......@@ -596,6 +617,102 @@ MAGLEV_UNIMPLEMENTED_BYTECODE(DeletePropertyStrict)
MAGLEV_UNIMPLEMENTED_BYTECODE(DeletePropertySloppy)
MAGLEV_UNIMPLEMENTED_BYTECODE(GetSuperConstructor)
void MaglevGraphBuilder::InlineCallFromRegisters(
int argc_count, ConvertReceiverMode receiver_mode,
compiler::JSFunctionRef function) {
// The undefined constant node has to be created before the inner graph is
// created.
RootConstant* undefined_constant;
if (receiver_mode == ConvertReceiverMode::kNullOrUndefined) {
undefined_constant =
AddNewNode<RootConstant>({}, RootIndex::kUndefinedValue);
}
// Create a new compilation unit and graph builder for the inlined
// function.
MaglevCompilationUnit* inner_unit = MaglevCompilationUnit::NewInner(
zone(), compilation_unit_, function.object());
MaglevGraphBuilder inner_graph_builder(local_isolate_, inner_unit, graph_,
this);
// Finish the current block with a jump to the inlined function.
BasicBlockRef start_ref, end_ref;
BasicBlock* block = CreateBlock<JumpToInlined>({}, &start_ref, inner_unit);
ResolveJumpsToBlockAtOffset(block, block_offset_);
// Manually create the prologue of the inner function graph, so that we
// can manually set up the arguments.
inner_graph_builder.StartPrologue();
int arg_index = 0;
int reg_count;
if (receiver_mode == ConvertReceiverMode::kNullOrUndefined) {
reg_count = argc_count;
if (function.shared().language_mode() == LanguageMode::kSloppy) {
// TODO(leszeks): Store the global proxy somehow.
inner_graph_builder.SetArgument(arg_index++, undefined_constant);
} else {
inner_graph_builder.SetArgument(arg_index++, undefined_constant);
}
} else {
reg_count = argc_count + 1;
}
for (int i = 0; i < reg_count && i < inner_unit->parameter_count(); i++) {
inner_graph_builder.SetArgument(arg_index++, LoadRegisterTagged(i + 1));
}
for (; arg_index < inner_unit->parameter_count(); arg_index++) {
inner_graph_builder.SetArgument(arg_index, undefined_constant);
}
// TODO(leszeks): Also correctly set up the closure and context slots, instead
// of using InitialValue.
inner_graph_builder.BuildRegisterFrameInitialization();
BasicBlock* inlined_prologue = inner_graph_builder.EndPrologue();
// Set the entry JumpToInlined to jump to the prologue block.
// TODO(leszeks): Passing start_ref to JumpToInlined creates a two-element
// linked list of refs. Consider adding a helper to explicitly set the target
// instead.
start_ref.SetToBlockAndReturnNext(inlined_prologue)
->SetToBlockAndReturnNext(inlined_prologue);
// Build the inlined function body.
inner_graph_builder.BuildBody();
// All returns in the inlined body jump to a merge point one past the
// bytecode length (i.e. at offset bytecode.length()). Create a block at
// this fake offset and have it jump out of the inlined function, into a new
// block that we create which resumes execution of the outer function.
// TODO(leszeks): Wrap this up in a helper.
DCHECK_NULL(inner_graph_builder.current_block_);
inner_graph_builder.ProcessMergePoint(
inner_graph_builder.inline_exit_offset());
inner_graph_builder.StartNewBlock(inner_graph_builder.inline_exit_offset());
BasicBlock* end_block =
inner_graph_builder.CreateBlock<JumpFromInlined>({}, &end_ref);
inner_graph_builder.ResolveJumpsToBlockAtOffset(
end_block, inner_graph_builder.inline_exit_offset());
// Pull the returned accumulator value out of the inlined function's final
// merged return state.
current_interpreter_frame_.set_accumulator(
inner_graph_builder.current_interpreter_frame_.accumulator());
// Create a new block at our current offset, and resume execution. Do this
// manually to avoid trying to resolve any merges to this offset, which will
// have already been processed on entry to this visitor.
current_block_ =
zone()->New<BasicBlock>(zone()->New<MergePointInterpreterFrameState>(
*compilation_unit_, current_interpreter_frame_,
iterator_.current_offset(), 1, block, GetInLiveness()));
block_offset_ = iterator_.current_offset();
// Set the exit JumpFromInlined to jump to this resume block.
// TODO(leszeks): Passing start_ref to JumpFromInlined creates a two-element
// linked list of refs. Consider adding a helper to explicitly set the target
// instead.
end_ref.SetToBlockAndReturnNext(current_block_)
->SetToBlockAndReturnNext(current_block_);
}
// TODO(v8:7700): Read feedback and implement inlining
void MaglevGraphBuilder::BuildCallFromRegisterList(
ConvertReceiverMode receiver_mode) {
......@@ -628,9 +745,52 @@ void MaglevGraphBuilder::BuildCallFromRegisterList(
void MaglevGraphBuilder::BuildCallFromRegisters(
int argc_count, ConvertReceiverMode receiver_mode) {
// Indices and counts of operands on the bytecode.
const int kFirstArgumentOperandIndex = 1;
const int kReceiverOperandCount =
(receiver_mode == ConvertReceiverMode::kNullOrUndefined) ? 0 : 1;
const int kReceiverAndArgOperandCount = kReceiverOperandCount + argc_count;
const int kSlotOperandIndex =
kFirstArgumentOperandIndex + kReceiverAndArgOperandCount;
DCHECK_LE(argc_count, 2);
ValueNode* function = LoadRegisterTagged(0);
ValueNode* context = GetContext();
FeedbackSlot slot = GetSlotOperand(kSlotOperandIndex);
const compiler::ProcessedFeedback& processed_feedback =
broker()->GetFeedbackForCall(compiler::FeedbackSource(feedback(), slot));
switch (processed_feedback.kind()) {
case compiler::ProcessedFeedback::kInsufficient:
EmitUnconditionalDeopt();
return;
case compiler::ProcessedFeedback::kCall: {
if (!FLAG_maglev_inlining) break;
const compiler::CallFeedback& call_feedback = processed_feedback.AsCall();
CallFeedbackContent content = call_feedback.call_feedback_content();
if (content != CallFeedbackContent::kTarget) break;
base::Optional<compiler::HeapObjectRef> maybe_target =
call_feedback.target();
if (!maybe_target.has_value()) break;
compiler::HeapObjectRef target = maybe_target.value();
if (!target.IsJSFunction()) break;
compiler::JSFunctionRef function = target.AsJSFunction();
base::Optional<compiler::FeedbackVectorRef> maybe_feedback_vector =
function.feedback_vector(broker()->dependencies());
if (!maybe_feedback_vector.has_value()) break;
return InlineCallFromRegisters(argc_count, receiver_mode, function);
}
default:
break;
}
// On fallthrough, create a generic call.
int argc_count_with_recv = argc_count + 1;
size_t input_count = argc_count_with_recv + Call::kFixedInputCount;
......@@ -795,6 +955,31 @@ void MaglevGraphBuilder::MergeIntoFrameState(BasicBlock* predecessor,
}
}
void MaglevGraphBuilder::MergeIntoInlinedReturnFrameState(
BasicBlock* predecessor) {
int target = inline_exit_offset();
if (merge_states_[target] == nullptr) {
// All returns should have the same liveness, which is that only the
// accumulator is live.
const compiler::BytecodeLivenessState* liveness =
bytecode_analysis().GetInLivenessFor(iterator_.current_offset());
DCHECK(liveness->AccumulatorIsLive());
DCHECK_EQ(liveness->live_value_count(), 1);
// If there's no target frame state, allocate a new one.
merge_states_[target] = zone()->New<MergePointInterpreterFrameState>(
*compilation_unit_, current_interpreter_frame_, target,
NumPredecessors(target), predecessor, liveness);
} else {
// Again, all returns should have the same liveness, so double check this.
DCHECK(bytecode_analysis()
.GetInLivenessFor(iterator_.current_offset())
->Equals(*merge_states_[target]->frame_state().liveness()));
merge_states_[target]->Merge(*compilation_unit_, current_interpreter_frame_,
predecessor, target);
}
}
void MaglevGraphBuilder::BuildBranchIfTrue(ValueNode* node, int true_target,
int false_target) {
BasicBlock* block = FinishBlock<BranchIfTrue>(next_offset(), {node},
......@@ -842,7 +1027,19 @@ MAGLEV_UNIMPLEMENTED_BYTECODE(SetPendingMessage)
MAGLEV_UNIMPLEMENTED_BYTECODE(Throw)
MAGLEV_UNIMPLEMENTED_BYTECODE(ReThrow)
void MaglevGraphBuilder::VisitReturn() {
if (!is_inline()) {
FinishBlock<Return>(next_offset(), {GetAccumulatorTagged()});
return;
}
// All inlined function returns instead jump to one past the end of the
// bytecode, where we'll later create a final basic block which resumes
// execution of the caller.
// TODO(leszeks): Consider shortcutting this Jump for cases where there is
// only one return and no need to merge return states.
BasicBlock* block = FinishBlock<Jump>(next_offset(), {},
&jump_targets_[inline_exit_offset()]);
MergeIntoInlinedReturnFrameState(block);
}
MAGLEV_UNIMPLEMENTED_BYTECODE(ThrowReferenceErrorIfHole)
MAGLEV_UNIMPLEMENTED_BYTECODE(ThrowSuperNotCalledIfHole)
......
......@@ -27,9 +27,28 @@ class MaglevGraphBuilder {
public:
explicit MaglevGraphBuilder(LocalIsolate* local_isolate,
MaglevCompilationUnit* compilation_unit,
Graph* graph);
Graph* graph,
MaglevGraphBuilder* parent = nullptr);
void Build() {
DCHECK(!is_inline());
StartPrologue();
for (int i = 0; i < parameter_count(); i++) {
SetArgument(i, AddNewNode<InitialValue>(
{}, interpreter::Register::FromParameterIndex(i)));
}
BuildRegisterFrameInitialization();
EndPrologue();
BuildBody();
}
void StartPrologue();
void SetArgument(int i, ValueNode* value);
void BuildRegisterFrameInitialization();
BasicBlock* EndPrologue();
void BuildBody() {
for (iterator_.Reset(); !iterator_.done(); iterator_.Advance()) {
VisitSingleBytecode();
// TODO(v8:7700): Clean up after all bytecodes are supported.
......@@ -343,7 +362,13 @@ class MaglevGraphBuilder {
latest_checkpointed_state_.emplace(
BytecodeOffset(iterator_.current_offset()),
zone()->New<CompactInterpreterFrameState>(
*compilation_unit_, GetInLiveness(), current_interpreter_frame_));
*compilation_unit_, GetInLiveness(), current_interpreter_frame_),
parent_ == nullptr
? nullptr
// TODO(leszeks): Don't always allocate for the parent state,
// maybe cache it on the graph builder?
: zone()->New<CheckpointedInterpreterState>(
parent_->GetLatestCheckpointedState()));
}
return *latest_checkpointed_state_;
}
......@@ -352,7 +377,9 @@ class MaglevGraphBuilder {
return CheckpointedInterpreterState(
BytecodeOffset(iterator_.current_offset()),
zone()->New<CompactInterpreterFrameState>(
*compilation_unit_, GetOutLiveness(), current_interpreter_frame_));
*compilation_unit_, GetOutLiveness(), current_interpreter_frame_),
// TODO(leszeks): Support lazy deopts in inlined functions.
nullptr);
}
template <typename NodeT>
......@@ -444,6 +471,10 @@ class MaglevGraphBuilder {
return block;
}
void InlineCallFromRegisters(int argc_count,
ConvertReceiverMode receiver_mode,
compiler::JSFunctionRef function);
void BuildCallFromRegisterList(ConvertReceiverMode receiver_mode);
void BuildCallFromRegisters(int argc_count,
ConvertReceiverMode receiver_mode);
......@@ -465,6 +496,7 @@ class MaglevGraphBuilder {
void VisitBinarySmiOperation();
void MergeIntoFrameState(BasicBlock* block, int target);
void MergeIntoInlinedReturnFrameState(BasicBlock* block);
void BuildBranchIfTrue(ValueNode* node, int true_target, int false_target);
void BuildBranchIfToBooleanTrue(ValueNode* node, int true_target,
int false_target);
......@@ -491,12 +523,18 @@ class MaglevGraphBuilder {
} else if (interpreter::Bytecodes::Returns(bytecode) ||
interpreter::Bytecodes::UnconditionallyThrows(bytecode)) {
predecessors_[iterator.next_offset()]--;
// Collect inline return jumps in the slot after the last bytecode.
if (is_inline() && interpreter::Bytecodes::Returns(bytecode)) {
predecessors_[array_length - 1]++;
}
}
// TODO(leszeks): Also consider handler entries (the bytecode analysis)
// will do this automatically I guess if we merge this into that.
}
if (!is_inline()) {
DCHECK_EQ(0, predecessors_[bytecode().length()]);
}
}
int NumPredecessors(int offset) { return predecessors_[offset]; }
......@@ -530,8 +568,19 @@ class MaglevGraphBuilder {
return compilation_unit_->graph_labeller();
}
// True when this graph builder is building the subgraph of an inlined
// function.
bool is_inline() const { return parent_ != nullptr; }
// The fake offset used as a target for all exits of an inlined function.
int inline_exit_offset() const {
DCHECK(is_inline());
return bytecode().length();
}
LocalIsolate* const local_isolate_;
MaglevCompilationUnit* const compilation_unit_;
MaglevGraphBuilder* const parent_;
Graph* const graph_;
interpreter::BytecodeArrayIterator iterator_;
uint32_t* predecessors_;
......
......@@ -314,10 +314,8 @@ void PrintEagerDeopt(std::ostream& os, std::vector<BasicBlock*> targets,
os << " ↱ eager @" << deopt_info->state.bytecode_position << " : {";
bool first = true;
int index = 0;
const MaglevCompilationUnit& compilation_unit =
*state.compilation_info()->toplevel_compilation_unit();
deopt_info->state.register_frame->ForEachValue(
compilation_unit, [&](ValueNode* node, interpreter::Register reg) {
deopt_info->unit, [&](ValueNode* node, interpreter::Register reg) {
if (first) {
first = false;
} else {
......@@ -355,10 +353,8 @@ void PrintLazyDeopt(std::ostream& os, std::vector<BasicBlock*> targets,
os << " ↳ lazy @" << deopt_info->state.bytecode_position << " : {";
bool first = true;
int index = 0;
const MaglevCompilationUnit& compilation_unit =
*state.compilation_info()->toplevel_compilation_unit();
deopt_info->state.register_frame->ForEachValue(
compilation_unit, [&](ValueNode* node, interpreter::Register reg) {
deopt_info->unit, [&](ValueNode* node, interpreter::Register reg) {
if (first) {
first = false;
} else {
......
......@@ -70,6 +70,8 @@ class MaglevGraphVerifier {
case Opcode::kDeopt:
case Opcode::kJump:
case Opcode::kJumpLoop:
case Opcode::kJumpToInlined:
case Opcode::kJumpFromInlined:
// No input.
DCHECK_EQ(node->input_count(), 0);
break;
......
......@@ -336,15 +336,32 @@ void NodeBase::Print(std::ostream& os,
UNREACHABLE();
}
namespace {
size_t GetInputLocationsArraySize(const MaglevCompilationUnit& compilation_unit,
const CheckpointedInterpreterState& state) {
size_t size = state.register_frame->size(compilation_unit);
const CheckpointedInterpreterState* parent = state.parent;
const MaglevCompilationUnit* parent_unit = compilation_unit.caller();
while (parent != nullptr) {
size += parent->register_frame->size(*parent_unit);
parent = parent->parent;
parent_unit = parent_unit->caller();
}
return size;
}
} // namespace
DeoptInfo::DeoptInfo(Zone* zone, const MaglevCompilationUnit& compilation_unit,
CheckpointedInterpreterState state)
: state(state),
: unit(compilation_unit),
state(state),
input_locations(zone->NewArray<InputLocation>(
state.register_frame->size(compilation_unit))) {
GetInputLocationsArraySize(compilation_unit, state))) {
// Default initialise if we're printing the graph, to avoid printing junk
// values.
if (FLAG_print_maglev_graph) {
for (size_t i = 0; i < state.register_frame->size(compilation_unit); ++i) {
for (size_t i = 0; i < GetInputLocationsArraySize(compilation_unit, state);
++i) {
new (&input_locations[i]) InputLocation();
}
}
......@@ -894,6 +911,30 @@ void Jump::GenerateCode(MaglevCodeGenState* code_gen_state,
}
}
void JumpToInlined::AllocateVreg(MaglevVregAllocationState* vreg_state,
const ProcessingState& state) {}
void JumpToInlined::GenerateCode(MaglevCodeGenState* code_gen_state,
const ProcessingState& state) {
// Avoid emitting a jump to the next block.
if (target() != state.next_block()) {
__ jmp(target()->label());
}
}
void JumpToInlined::PrintParams(std::ostream& os,
MaglevGraphLabeller* graph_labeller) const {
os << "(" << Brief(*unit()->shared_function_info().object()) << ")";
}
void JumpFromInlined::AllocateVreg(MaglevVregAllocationState* vreg_state,
const ProcessingState& state) {}
void JumpFromInlined::GenerateCode(MaglevCodeGenState* code_gen_state,
const ProcessingState& state) {
// Avoid emitting a jump to the next block.
if (target() != state.next_block()) {
__ jmp(target()->label());
}
}
void JumpLoop::AllocateVreg(MaglevVregAllocationState* vreg_state,
const ProcessingState& state) {}
void JumpLoop::GenerateCode(MaglevCodeGenState* code_gen_state,
......
......@@ -94,7 +94,9 @@ class CompactInterpreterFrameState;
#define UNCONDITIONAL_CONTROL_NODE_LIST(V) \
V(Jump) \
V(JumpLoop)
V(JumpLoop) \
V(JumpToInlined) \
V(JumpFromInlined)
#define CONTROL_NODE_LIST(V) \
V(Return) \
......@@ -328,11 +330,15 @@ class CheckpointedInterpreterState {
public:
CheckpointedInterpreterState() = default;
CheckpointedInterpreterState(BytecodeOffset bytecode_position,
const CompactInterpreterFrameState* state)
: bytecode_position(bytecode_position), register_frame(state) {}
const CompactInterpreterFrameState* state,
const CheckpointedInterpreterState* parent)
: bytecode_position(bytecode_position),
register_frame(state),
parent(parent) {}
BytecodeOffset bytecode_position = BytecodeOffset::None();
const CompactInterpreterFrameState* register_frame = nullptr;
const CheckpointedInterpreterState* parent = nullptr;
};
class DeoptInfo {
......@@ -341,6 +347,7 @@ class DeoptInfo {
CheckpointedInterpreterState checkpoint);
public:
const MaglevCompilationUnit& unit;
CheckpointedInterpreterState state;
InputLocation* input_locations = nullptr;
Label deopt_entry_label;
......@@ -1464,9 +1471,9 @@ class ControlNode : public NodeBase {
return next_post_dominating_hole_;
}
void set_next_post_dominating_hole(ControlNode* node) {
DCHECK_IMPLIES(node != nullptr, node->Is<Jump>() || node->Is<Return>() ||
node->Is<Deopt>() ||
node->Is<JumpLoop>());
DCHECK_IMPLIES(node != nullptr, node->Is<UnconditionalControlNode>() ||
node->Is<Return>() ||
node->Is<Deopt>());
next_post_dominating_hole_ = node;
}
......@@ -1590,6 +1597,36 @@ class JumpLoop : public UnconditionalControlNodeT<JumpLoop> {
void PrintParams(std::ostream&, MaglevGraphLabeller*) const {}
};
class JumpToInlined : public UnconditionalControlNodeT<JumpToInlined> {
using Base = UnconditionalControlNodeT<JumpToInlined>;
public:
explicit JumpToInlined(uint32_t bitfield, BasicBlockRef* target_refs,
MaglevCompilationUnit* unit)
: Base(bitfield, target_refs), unit_(unit) {}
void AllocateVreg(MaglevVregAllocationState*, const ProcessingState&);
void GenerateCode(MaglevCodeGenState*, const ProcessingState&);
void PrintParams(std::ostream&, MaglevGraphLabeller*) const;
const MaglevCompilationUnit* unit() const { return unit_; }
private:
MaglevCompilationUnit* unit_;
};
class JumpFromInlined : public UnconditionalControlNodeT<JumpFromInlined> {
using Base = UnconditionalControlNodeT<JumpFromInlined>;
public:
explicit JumpFromInlined(uint32_t bitfield, BasicBlockRef* target_refs)
: Base(bitfield, target_refs) {}
void AllocateVreg(MaglevVregAllocationState*, const ProcessingState&);
void GenerateCode(MaglevCodeGenState*, const ProcessingState&);
void PrintParams(std::ostream&, MaglevGraphLabeller*) const {}
};
class Return : public ControlNode {
public:
explicit Return(uint32_t bitfield) : ControlNode(bitfield) {
......
......@@ -339,28 +339,18 @@ void StraightForwardRegisterAllocator::UpdateUse(
void StraightForwardRegisterAllocator::UpdateUse(
const EagerDeoptInfo& deopt_info) {
const CompactInterpreterFrameState* checkpoint_state =
deopt_info.state.register_frame;
const MaglevCompilationUnit& compilation_unit =
*compilation_info_->toplevel_compilation_unit();
int index = 0;
checkpoint_state->ForEachValue(
compilation_unit, [&](ValueNode* node, interpreter::Register reg) {
InputLocation* input = &deopt_info.input_locations[index++];
input->InjectAllocated(node->allocation());
UpdateUse(node, input);
});
UpdateUse(deopt_info.unit, &deopt_info.state, deopt_info.input_locations,
index);
}
void StraightForwardRegisterAllocator::UpdateUse(
const LazyDeoptInfo& deopt_info) {
const CompactInterpreterFrameState* checkpoint_state =
deopt_info.state.register_frame;
const MaglevCompilationUnit& compilation_unit =
*compilation_info_->toplevel_compilation_unit();
int index = 0;
checkpoint_state->ForEachValue(
compilation_unit, [&](ValueNode* node, interpreter::Register reg) {
deopt_info.unit, [&](ValueNode* node, interpreter::Register reg) {
// Skip over the result location.
if (reg == deopt_info.result_location) return;
InputLocation* input = &deopt_info.input_locations[index++];
......@@ -369,6 +359,22 @@ void StraightForwardRegisterAllocator::UpdateUse(
});
}
void StraightForwardRegisterAllocator::UpdateUse(
const MaglevCompilationUnit& unit,
const CheckpointedInterpreterState* state, InputLocation* input_locations,
int& index) {
if (state->parent) {
UpdateUse(*unit.caller(), state->parent, input_locations, index);
}
const CompactInterpreterFrameState* checkpoint_state = state->register_frame;
checkpoint_state->ForEachValue(
unit, [&](ValueNode* node, interpreter::Register reg) {
InputLocation* input = &input_locations[index++];
input->InjectAllocated(node->allocation());
UpdateUse(node, input);
});
}
void StraightForwardRegisterAllocator::AllocateNode(Node* node) {
for (Input& input : *node) AssignInput(input);
AssignTemporaries(node);
......@@ -546,7 +552,10 @@ void StraightForwardRegisterAllocator::AllocateControlNode(ControlNode* node,
// Merge register values. Values only flowing into phis and not being
// independently live will be killed as part of the merge.
if (auto unconditional = node->TryCast<UnconditionalControlNode>()) {
if (node->Is<JumpToInlined>()) {
// Do nothing.
// TODO(leszeks): DCHECK any useful invariants here.
} else if (auto unconditional = node->TryCast<UnconditionalControlNode>()) {
// Empty blocks are immediately merged at the control of their predecessor.
if (!block->is_empty_block()) {
MergeRegisterValues(unconditional, unconditional->target(),
......
......@@ -62,6 +62,9 @@ class StraightForwardRegisterAllocator {
void UpdateUse(ValueNode* node, InputLocation* input_location);
void UpdateUse(const EagerDeoptInfo& deopt_info);
void UpdateUse(const LazyDeoptInfo& deopt_info);
void UpdateUse(const MaglevCompilationUnit& unit,
const CheckpointedInterpreterState* state,
InputLocation* input_locations, int& index);
void AllocateControlNode(ControlNode* node, BasicBlock* block);
void AllocateNode(Node* node);
......
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --maglev --maglev-inlining --no-stress-opt
function inner(o) {
"use strict"
return 10 + o.x + 100;
}
function foo(o) {
return 1000 + inner(o) + 10000;
}
%PrepareFunctionForOptimization(inner);
%PrepareFunctionForOptimization(foo);
assertEquals(11111, foo({x:1}));
assertEquals(11111, foo({x:1}));
%OptimizeMaglevOnNextCall(foo);
// The inlined inner function will deopt -- this deopt should succeed.
assertEquals(11111, foo({y:2,x:1}));
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --maglev --maglev-inlining --no-stress-opt
function global_func(x) {
return x;
}
function foo(x) {
return global_func(x);
}
%PrepareFunctionForOptimization(foo);
%PrepareFunctionForOptimization(global_func);
print(foo(1));
print(foo(1));
%OptimizeMaglevOnNextCall(foo);
print(foo(1));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment