Commit 8fc7735b authored by Jakob Linke's avatar Jakob Linke Committed by V8 LUCI CQ

[maglev] Step 1 towards ML-TF tiering

Added IR members:
For updating the interrupt budget: Jump, JumpLoop, Return and
JumpFromInlined apply a delta to the interrupt budget.  For OSR:
JumpLoop needs its loop depth and the feedback slot (where cached OSR
code lives).

This CL also adds code to maintain the interrupt budget in ML code.

Future tasks are documented in TODOs.

Bug: v8:7700
Change-Id: I240dc3ea76a1e60fda45e1d39b0b5f57dd9c566b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3610423
Auto-Submit: Jakob Linke <jgruber@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarToon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80393}
parent cb222018
......@@ -395,6 +395,8 @@ void TieringManager::OnInterruptTick(Handle<JSFunction> function) {
function_obj.feedback_vector().SaturatingIncrementProfilerTicks();
// TODO(v8:7700): When tiering up from ML we no longer have an
// UnoptimizedFrame. Add logic to handle this case as well.
JavaScriptFrameIterator it(isolate_);
UnoptimizedFrame* frame = UnoptimizedFrame::cast(it.frame());
const CodeKind code_kind = function_obj.GetActiveTier().value();
......
......@@ -65,6 +65,10 @@ class MaglevCodeGeneratingNodeProcessor {
__ Push(kJSFunctionRegister); // Callee's JS function.
__ Push(kJavaScriptCallArgCountRegister); // Actual argument count.
// TODO(v8:7700): Handle TieringState and cached optimized code. See also:
// LoadTieringStateAndJumpIfNeedsProcessing and
// MaybeOptimizeCodeOrTailCallOptimizedCodeSlot.
// Extend rsp by the size of the frame.
code_gen_state_->set_untagged_slots(graph->untagged_stack_slots());
code_gen_state_->set_tagged_slots(graph->tagged_stack_slots());
......
......@@ -804,8 +804,11 @@ void MaglevGraphBuilder::InlineCallFromRegisters(
inner_graph_builder.ProcessMergePoint(
inner_graph_builder.inline_exit_offset());
inner_graph_builder.StartNewBlock(inner_graph_builder.inline_exit_offset());
BasicBlock* end_block =
inner_graph_builder.CreateBlock<JumpFromInlined>({}, &end_ref);
// See also: InterpreterAssembler::UpdateInterruptBudgetOnReturn.
const uint32_t relative_jump_bytecode_offset =
inner_graph_builder.iterator_.current_offset();
BasicBlock* end_block = inner_graph_builder.CreateBlock<JumpFromInlined>(
{}, &end_ref, relative_jump_bytecode_offset);
inner_graph_builder.ResolveJumpsToBlockAtOffset(
end_block, inner_graph_builder.inline_exit_offset());
......@@ -1055,20 +1058,30 @@ MAGLEV_UNIMPLEMENTED_BYTECODE(CreateUnmappedArguments)
MAGLEV_UNIMPLEMENTED_BYTECODE(CreateRestParameter)
void MaglevGraphBuilder::VisitJumpLoop() {
const uint32_t relative_jump_bytecode_offset =
iterator_.GetUnsignedImmediateOperand(0);
const int32_t loop_offset = iterator_.GetImmediateOperand(1);
const FeedbackSlot feedback_slot = iterator_.GetSlotOperand(2);
int target = iterator_.GetJumpTargetOffset();
BasicBlock* block =
target == iterator_.current_offset()
? FinishBlock<JumpLoop>(next_offset(), {}, &jump_targets_[target])
: FinishBlock<JumpLoop>(next_offset(), {},
jump_targets_[target].block_ptr());
? FinishBlock<JumpLoop>(next_offset(), {}, &jump_targets_[target],
relative_jump_bytecode_offset, loop_offset,
feedback_slot)
: FinishBlock<JumpLoop>(
next_offset(), {}, jump_targets_[target].block_ptr(),
relative_jump_bytecode_offset, loop_offset, feedback_slot);
merge_states_[target]->MergeLoop(*compilation_unit_,
current_interpreter_frame_, block, target);
block->set_predecessor_id(0);
}
void MaglevGraphBuilder::VisitJump() {
const uint32_t relative_jump_bytecode_offset =
iterator_.GetUnsignedImmediateOperand(0);
BasicBlock* block = FinishBlock<Jump>(
next_offset(), {}, &jump_targets_[iterator_.GetJumpTargetOffset()]);
next_offset(), {}, &jump_targets_[iterator_.GetJumpTargetOffset()],
relative_jump_bytecode_offset);
MergeIntoFrameState(block, iterator_.GetJumpTargetOffset());
DCHECK_LT(next_offset(), bytecode().length());
}
......@@ -1195,8 +1208,11 @@ MAGLEV_UNIMPLEMENTED_BYTECODE(SetPendingMessage)
MAGLEV_UNIMPLEMENTED_BYTECODE(Throw)
MAGLEV_UNIMPLEMENTED_BYTECODE(ReThrow)
void MaglevGraphBuilder::VisitReturn() {
// See also: InterpreterAssembler::UpdateInterruptBudgetOnReturn.
const uint32_t relative_jump_bytecode_offset = iterator_.current_offset();
if (!is_inline()) {
FinishBlock<Return>(next_offset(), {GetAccumulatorTagged()});
FinishBlock<Return>(next_offset(), {GetAccumulatorTagged()},
relative_jump_bytecode_offset);
return;
}
......@@ -1205,8 +1221,9 @@ void MaglevGraphBuilder::VisitReturn() {
// execution of the caller.
// TODO(leszeks): Consider shortcutting this Jump for cases where there is
// only one return and no need to merge return states.
BasicBlock* block = FinishBlock<Jump>(next_offset(), {},
&jump_targets_[inline_exit_offset()]);
BasicBlock* block =
FinishBlock<Jump>(next_offset(), {}, &jump_targets_[inline_exit_offset()],
relative_jump_bytecode_offset);
MergeIntoInlinedReturnFrameState(block);
}
MAGLEV_UNIMPLEMENTED_BYTECODE(ThrowReferenceErrorIfHole)
......
......@@ -1050,6 +1050,46 @@ void Construct::GenerateCode(MaglevCodeGenState* code_gen_state,
code_gen_state->DefineLazyDeoptPoint(lazy_deopt_info());
}
namespace {
void AttemptOnStackReplacement(MaglevCodeGenState* code_gen_state,
int32_t loop_depth, FeedbackSlot feedback_slot) {
// TODO(v8:7700): Implement me. See also
// InterpreterAssembler::OnStackReplacement.
}
void UpdateInterruptBudgetAndMaybeCallRuntime(
MaglevCodeGenState* code_gen_state, Register scratch,
int32_t relative_jump_bytecode_offset) {
Label out;
// TODO(v8:7700): Remove once regalloc is fixed. See crrev.com/c/3625978.
__ Push(scratch);
__ movq(scratch, MemOperand(rbp, StandardFrameConstants::kFunctionOffset));
__ LoadTaggedPointerField(
scratch, FieldOperand(scratch, JSFunction::kFeedbackCellOffset));
__ addl(FieldOperand(scratch, FeedbackCell::kInterruptBudgetOffset),
Immediate(relative_jump_bytecode_offset));
__ j(greater_equal, &out);
__ Move(kContextRegister, code_gen_state->native_context().object());
__ Push(MemOperand(rbp, StandardFrameConstants::kFunctionOffset));
__ CallRuntime(Runtime::kBytecodeBudgetInterruptWithStackCheck, 1);
__ bind(&out);
// TODO(v8:7700): Remove once regalloc is fixed. See crrev.com/c/3625978.
__ Pop(scratch);
}
void UpdateInterruptBudgetAndMaybeCallRuntime(
MaglevCodeGenState* code_gen_state, Register scratch,
base::Optional<uint32_t> relative_jump_bytecode_offset) {
if (!relative_jump_bytecode_offset.has_value()) return;
UpdateInterruptBudgetAndMaybeCallRuntime(
code_gen_state, scratch, relative_jump_bytecode_offset.value());
}
} // namespace
// ---
// Control nodes
// ---
......@@ -1061,6 +1101,13 @@ void Return::GenerateCode(MaglevCodeGenState* code_gen_state,
const ProcessingState& state) {
DCHECK_EQ(ToRegister(value_input()), kReturnRegister0);
// We're not going to continue execution, so we can use an arbitrary register
// here instead of relying on temporaries from the register allocator.
Register scratch = r8;
UpdateInterruptBudgetAndMaybeCallRuntime(code_gen_state, scratch,
relative_jump_bytecode_offset_);
// Read the formal number of parameters from the top level compilation unit
// (i.e. the outermost, non inlined function).
int formal_params_size = code_gen_state->compilation_info()
......@@ -1069,7 +1116,7 @@ void Return::GenerateCode(MaglevCodeGenState* code_gen_state,
// We're not going to continue execution, so we can use an arbitrary register
// here instead of relying on temporaries from the register allocator.
Register actual_params_size = r8;
Register actual_params_size = scratch;
// Compute the size of the actual parameters + receiver (in bytes).
// TODO(leszeks): Consider making this an input into Return to re-use the
......@@ -1105,9 +1152,14 @@ void Deopt::GenerateCode(MaglevCodeGenState* code_gen_state,
}
void Jump::AllocateVreg(MaglevVregAllocationState* vreg_state,
const ProcessingState& state) {}
const ProcessingState& state) {
set_temporaries_needed(1);
}
void Jump::GenerateCode(MaglevCodeGenState* code_gen_state,
const ProcessingState& state) {
UpdateInterruptBudgetAndMaybeCallRuntime(
code_gen_state, temporaries().PopFirst(), relative_jump_bytecode_offset_);
// Avoid emitting a jump to the next block.
if (target() != state.next_block()) {
__ jmp(target()->label());
......@@ -1129,9 +1181,14 @@ void JumpToInlined::PrintParams(std::ostream& os,
}
void JumpFromInlined::AllocateVreg(MaglevVregAllocationState* vreg_state,
const ProcessingState& state) {}
const ProcessingState& state) {
set_temporaries_needed(1);
}
void JumpFromInlined::GenerateCode(MaglevCodeGenState* code_gen_state,
const ProcessingState& state) {
UpdateInterruptBudgetAndMaybeCallRuntime(
code_gen_state, temporaries().PopFirst(), relative_jump_bytecode_offset_);
// Avoid emitting a jump to the next block.
if (target() != state.next_block()) {
__ jmp(target()->label());
......@@ -1139,9 +1196,16 @@ void JumpFromInlined::GenerateCode(MaglevCodeGenState* code_gen_state,
}
void JumpLoop::AllocateVreg(MaglevVregAllocationState* vreg_state,
const ProcessingState& state) {}
const ProcessingState& state) {
set_temporaries_needed(1);
}
void JumpLoop::GenerateCode(MaglevCodeGenState* code_gen_state,
const ProcessingState& state) {
AttemptOnStackReplacement(code_gen_state, loop_depth_, feedback_slot_);
UpdateInterruptBudgetAndMaybeCallRuntime(code_gen_state,
temporaries().PopFirst(),
-relative_jump_bytecode_offset_);
__ jmp(target()->label());
}
......
......@@ -1854,27 +1854,50 @@ class Jump : public UnconditionalControlNodeT<Jump> {
using Base = UnconditionalControlNodeT<Jump>;
public:
explicit Jump(uint32_t bitfield, BasicBlockRef* target_refs)
: Base(bitfield, target_refs) {}
Jump(uint32_t bitfield, BasicBlockRef* target_refs,
base::Optional<uint32_t> relative_jump_bytecode_offset = {})
: Base(bitfield, target_refs),
relative_jump_bytecode_offset_(relative_jump_bytecode_offset) {}
void AllocateVreg(MaglevVregAllocationState*, const ProcessingState&);
void GenerateCode(MaglevCodeGenState*, const ProcessingState&);
void PrintParams(std::ostream&, MaglevGraphLabeller*) const {}
private:
// For maintaining the interrupt_budget.
const base::Optional<uint32_t> relative_jump_bytecode_offset_;
};
class JumpLoop : public UnconditionalControlNodeT<JumpLoop> {
using Base = UnconditionalControlNodeT<JumpLoop>;
public:
explicit JumpLoop(uint32_t bitfield, BasicBlock* target)
: Base(bitfield, target) {}
explicit JumpLoop(uint32_t bitfield, BasicBlockRef* ref)
: Base(bitfield, ref) {}
explicit JumpLoop(uint32_t bitfield, BasicBlock* target,
uint32_t relative_jump_bytecode_offset, int32_t loop_depth,
FeedbackSlot feedback_slot)
: Base(bitfield, target),
relative_jump_bytecode_offset_(relative_jump_bytecode_offset),
loop_depth_(loop_depth),
feedback_slot_(feedback_slot) {}
explicit JumpLoop(uint32_t bitfield, BasicBlockRef* ref,
uint32_t relative_jump_bytecode_offset, int32_t loop_depth,
FeedbackSlot feedback_slot)
: Base(bitfield, ref),
relative_jump_bytecode_offset_(relative_jump_bytecode_offset),
loop_depth_(loop_depth),
feedback_slot_(feedback_slot) {}
void AllocateVreg(MaglevVregAllocationState*, const ProcessingState&);
void GenerateCode(MaglevCodeGenState*, const ProcessingState&);
void PrintParams(std::ostream&, MaglevGraphLabeller*) const {}
private:
// For maintaining the interrupt_budget.
const uint32_t relative_jump_bytecode_offset_;
// For OSR.
const int32_t loop_depth_;
const FeedbackSlot feedback_slot_;
};
class JumpToInlined : public UnconditionalControlNodeT<JumpToInlined> {
......@@ -1899,17 +1922,27 @@ class JumpFromInlined : public UnconditionalControlNodeT<JumpFromInlined> {
using Base = UnconditionalControlNodeT<JumpFromInlined>;
public:
explicit JumpFromInlined(uint32_t bitfield, BasicBlockRef* target_refs)
: Base(bitfield, target_refs) {}
explicit JumpFromInlined(
uint32_t bitfield, BasicBlockRef* target_refs,
base::Optional<uint32_t> relative_jump_bytecode_offset = {})
: Base(bitfield, target_refs),
relative_jump_bytecode_offset_(relative_jump_bytecode_offset) {}
void AllocateVreg(MaglevVregAllocationState*, const ProcessingState&);
void GenerateCode(MaglevCodeGenState*, const ProcessingState&);
void PrintParams(std::ostream&, MaglevGraphLabeller*) const {}
private:
// For maintaining the interrupt_budget.
const base::Optional<uint32_t> relative_jump_bytecode_offset_;
};
class Return : public ControlNode {
public:
explicit Return(uint32_t bitfield) : ControlNode(bitfield) {
explicit Return(uint32_t bitfield,
base::Optional<uint32_t> relative_jump_bytecode_offset = {})
: ControlNode(bitfield),
relative_jump_bytecode_offset_(relative_jump_bytecode_offset) {
DCHECK_EQ(NodeBase::opcode(), opcode_of<Return>);
}
......@@ -1918,6 +1951,10 @@ class Return : public ControlNode {
void AllocateVreg(MaglevVregAllocationState*, const ProcessingState&);
void GenerateCode(MaglevCodeGenState*, const ProcessingState&);
void PrintParams(std::ostream&, MaglevGraphLabeller*) const {}
private:
// For maintaining the interrupt_budget.
const base::Optional<uint32_t> relative_jump_bytecode_offset_;
};
class Deopt : public ControlNode {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment