Commit 4c6267e0 authored by Liu Yu's avatar Liu Yu Committed by V8 LUCI CQ

[loong64][mips][osr] Use the new OSR cache

Port commit 91453880

Bug: v8:12161
Change-Id: If2a465a80387d08df885e72e07b16962b6bc5d8b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3609227
Auto-Submit: Yu Liu <liuyu@loongson.cn>
Reviewed-by: 's avatarZhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Cr-Commit-Position: refs/heads/main@{#80216}
parent dccd1ed2
......@@ -381,6 +381,30 @@ void BaselineAssembler::StoreTaggedFieldNoWriteBarrier(Register target,
Register value) {
__ St_d(value, FieldMemOperand(target, offset));
}
void BaselineAssembler::TryLoadOptimizedOsrCode(Register scratch_and_result,
Register feedback_vector,
FeedbackSlot slot,
Label* on_result,
Label::Distance) {
Label fallthrough;
LoadTaggedPointerField(scratch_and_result, feedback_vector,
FeedbackVector::OffsetOfElementAt(slot.ToInt()));
__ LoadWeakValue(scratch_and_result, scratch_and_result, &fallthrough);
// Is it marked_for_deoptimization? If yes, clear the slot.
{
ScratchRegisterScope temps(this);
Register scratch = temps.AcquireScratch();
__ TestCodeTIsMarkedForDeoptimizationAndJump(scratch_and_result, scratch,
eq, on_result);
__ li(scratch, __ ClearedValue());
StoreTaggedFieldNoWriteBarrier(
feedback_vector, FeedbackVector::OffsetOfElementAt(slot.ToInt()),
scratch);
}
__ bind(&fallthrough);
Move(scratch_and_result, 0);
}
void BaselineAssembler::AddToInterruptBudgetAndJumpIfNotExceeded(
int32_t weight, Label* skip_interrupt_label) {
ASM_CODE_COMMENT(masm_);
......
......@@ -393,6 +393,30 @@ void BaselineAssembler::StoreTaggedFieldNoWriteBarrier(Register target,
__ Sw(value, FieldMemOperand(target, offset));
}
void BaselineAssembler::TryLoadOptimizedOsrCode(Register scratch_and_result,
Register feedback_vector,
FeedbackSlot slot,
Label* on_result,
Label::Distance) {
Label fallthrough;
LoadTaggedPointerField(scratch_and_result, feedback_vector,
FeedbackVector::OffsetOfElementAt(slot.ToInt()));
__ LoadWeakValue(scratch_and_result, scratch_and_result, &fallthrough);
// Is it marked_for_deoptimization? If yes, clear the slot.
{
ScratchRegisterScope temps(this);
Register scratch = temps.AcquireScratch();
__ TestCodeTIsMarkedForDeoptimizationAndJump(scratch_and_result, scratch,
eq, on_result);
__ li(scratch, __ ClearedValue());
StoreTaggedFieldNoWriteBarrier(
feedback_vector, FeedbackVector::OffsetOfElementAt(slot.ToInt()),
scratch);
}
__ bind(&fallthrough);
Move(scratch_and_result, 0);
}
void BaselineAssembler::AddToInterruptBudgetAndJumpIfNotExceeded(
int32_t weight, Label* skip_interrupt_label) {
ASM_CODE_COMMENT(masm_);
......
......@@ -391,6 +391,30 @@ void BaselineAssembler::StoreTaggedFieldNoWriteBarrier(Register target,
__ Sd(value, FieldMemOperand(target, offset));
}
void BaselineAssembler::TryLoadOptimizedOsrCode(Register scratch_and_result,
Register feedback_vector,
FeedbackSlot slot,
Label* on_result,
Label::Distance) {
Label fallthrough;
LoadTaggedPointerField(scratch_and_result, feedback_vector,
FeedbackVector::OffsetOfElementAt(slot.ToInt()));
__ LoadWeakValue(scratch_and_result, scratch_and_result, &fallthrough);
// Is it marked_for_deoptimization? If yes, clear the slot.
{
ScratchRegisterScope temps(this);
Register scratch = temps.AcquireScratch();
__ TestCodeTIsMarkedForDeoptimizationAndJump(scratch_and_result, scratch,
eq, on_result);
__ li(scratch, __ ClearedValue());
StoreTaggedFieldNoWriteBarrier(
feedback_vector, FeedbackVector::OffsetOfElementAt(slot.ToInt()),
scratch);
}
__ bind(&fallthrough);
Move(scratch_and_result, 0);
}
void BaselineAssembler::AddToInterruptBudgetAndJumpIfNotExceeded(
int32_t weight, Label* skip_interrupt_label) {
ASM_CODE_COMMENT(masm_);
......
......@@ -878,11 +878,8 @@ static void TailCallOptimizedCodeSlot(MacroAssembler* masm,
// Check if the optimized code is marked for deopt. If it is, call the
// runtime to clear it.
__ Ld_d(a6, FieldMemOperand(optimized_code_entry,
Code::kCodeDataContainerOffset));
__ Ld_w(a6, FieldMemOperand(a6, CodeDataContainer::kKindSpecificFlagsOffset));
__ And(a6, a6, Operand(1 << Code::kMarkedForDeoptimizationBit));
__ Branch(&heal_optimized_code_slot, ne, a6, Operand(zero_reg));
__ TestCodeTIsMarkedForDeoptimizationAndJump(optimized_code_entry, a6, ne,
&heal_optimized_code_slot);
// Optimized code is good, get it into the closure and link the closure into
// the optimized functions list, then tail call the optimized code.
......@@ -1046,16 +1043,22 @@ static void MaybeOptimizeCodeOrTailCallOptimizedCodeSlot(
}
namespace {
void ResetBytecodeAgeAndOsrState(MacroAssembler* masm,
Register bytecode_array) {
// Reset code age and the OSR state (optimized to a single write).
static_assert(BytecodeArray::kOsrStateAndBytecodeAgeAreContiguous32Bits);
void ResetBytecodeAge(MacroAssembler* masm, Register bytecode_array) {
STATIC_ASSERT(BytecodeArray::kNoAgeBytecodeAge == 0);
__ St_w(zero_reg,
FieldMemOperand(bytecode_array,
BytecodeArray::kOsrUrgencyAndInstallTargetOffset));
__ St_h(zero_reg,
FieldMemOperand(bytecode_array, BytecodeArray::kBytecodeAgeOffset));
}
void ResetFeedbackVectorOsrUrgency(MacroAssembler* masm,
Register feedback_vector, Register scratch) {
DCHECK(!AreAliased(feedback_vector, scratch));
__ Ld_bu(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
__ And(scratch, scratch,
Operand(FeedbackVector::MaybeHasOptimizedOsrCodeBit::kMask));
__ St_b(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
}
} // namespace
// static
......@@ -1092,6 +1095,10 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
feedback_vector,
&has_optimized_code_or_state);
}
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, temps.Acquire());
}
// Increment invocation count for the function.
{
UseScratchRegisterScope temps(masm);
......@@ -1125,7 +1132,7 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
// the frame, so load it into a register.
Register bytecode_array = descriptor.GetRegisterParameter(
BaselineOutOfLinePrologueDescriptor::kInterpreterBytecodeArray);
ResetBytecodeAgeAndOsrState(masm, bytecode_array);
ResetBytecodeAge(masm, bytecode_array);
__ Push(argc, bytecode_array);
// Baseline code frames store the feedback vector where interpreter would
......@@ -1254,6 +1261,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
LoadTieringStateAndJumpIfNeedsProcessing(
masm, optimization_state, feedback_vector, &has_optimized_code_or_state);
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, temps.Acquire());
}
Label not_optimized;
__ bind(&not_optimized);
......@@ -1271,7 +1283,7 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
FrameScope frame_scope(masm, StackFrame::MANUAL);
__ PushStandardFrame(closure);
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister);
// Load initial bytecode offset.
__ li(kInterpreterBytecodeOffsetRegister,
......@@ -1797,44 +1809,25 @@ enum class OsrSourceTier {
};
void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
Register current_loop_depth,
Register encoded_current_bytecode_offset,
Register osr_urgency_and_install_target) {
static constexpr Register scratch = a3;
DCHECK(!AreAliased(scratch, current_loop_depth,
encoded_current_bytecode_offset,
osr_urgency_and_install_target));
// OSR based on urgency, i.e. is the OSR urgency greater than the current
// loop depth?
Label try_osr;
STATIC_ASSERT(BytecodeArray::OsrUrgencyBits::kShift == 0);
Register urgency = scratch;
__ And(urgency, osr_urgency_and_install_target,
BytecodeArray::OsrUrgencyBits::kMask);
__ Branch(&try_osr, hi, urgency, Operand(current_loop_depth));
// OSR based on the install target offset, i.e. does the current bytecode
// offset match the install target offset?
static constexpr int kMask = BytecodeArray::OsrInstallTargetBits::kMask;
Register install_target = osr_urgency_and_install_target;
__ And(install_target, osr_urgency_and_install_target, Operand(kMask));
__ Branch(&try_osr, eq, install_target,
Operand(encoded_current_bytecode_offset));
// Neither urgency nor the install target triggered, return to the caller.
// Note: the return value must be nullptr or a valid Code object.
__ Move(a0, zero_reg);
__ Ret();
__ bind(&try_osr);
Register maybe_target_code) {
Label jump_to_optimized_code;
{
// If maybe_target_code is not null, no need to call into runtime. A
// precondition here is: if maybe_target_code is a Code object, it must NOT
// be marked_for_deoptimization (callers must ensure this).
__ Branch(&jump_to_optimized_code, ne, maybe_target_code,
Operand(Smi::zero()));
}
ASM_CODE_COMMENT(masm);
{
FrameScope scope(masm, StackFrame::INTERNAL);
__ CallRuntime(Runtime::kCompileOptimizedOSR);
}
// If the code object is null, just return to the caller.
__ Ret(eq, a0, Operand(Smi::zero()));
__ Ret(eq, maybe_target_code, Operand(Smi::zero()));
__ bind(&jump_to_optimized_code);
if (source == OsrSourceTier::kInterpreter) {
// Drop the handler frame that is be sitting on top of the actual
......@@ -1844,8 +1837,9 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
// Load deoptimization data from the code object.
// <deopt_data> = <code>[#deoptimization_data_offset]
__ Ld_d(a1, MemOperand(a0, Code::kDeoptimizationDataOrInterpreterDataOffset -
kHeapObjectTag));
__ Ld_d(a1, MemOperand(maybe_target_code,
Code::kDeoptimizationDataOrInterpreterDataOffset -
kHeapObjectTag));
// Load the OSR entrypoint offset from the deoptimization data.
// <osr_offset> = <deopt_data>[#header_size + #osr_pc_offset]
......@@ -1855,30 +1849,27 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
// Compute the target address = code_obj + header_size + osr_offset
// <entry_addr> = <code_obj> + #header_size + <osr_offset>
__ Add_d(a0, a0, a1);
Generate_OSREntry(masm, a0, Operand(Code::kHeaderSize - kHeapObjectTag));
__ Add_d(maybe_target_code, maybe_target_code, a1);
Generate_OSREntry(masm, maybe_target_code,
Operand(Code::kHeaderSize - kHeapObjectTag));
}
} // namespace
void Builtins::Generate_InterpreterOnStackReplacement(MacroAssembler* masm) {
using D = InterpreterOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
OnStackReplacement(masm, OsrSourceTier::kInterpreter,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
void Builtins::Generate_BaselineOnStackReplacement(MacroAssembler* masm) {
using D = BaselineOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
__ Ld_d(kContextRegister,
MemOperand(fp, StandardFrameConstants::kContextOffset));
MemOperand(fp, BaselineFrameConstants::kContextOffset));
OnStackReplacement(masm, OsrSourceTier::kBaseline,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
// static
......@@ -3714,12 +3705,10 @@ void Generate_BaselineOrInterpreterEntry(MacroAssembler* masm,
__ Pop(kInterpreterAccumulatorRegister);
if (is_osr) {
// TODO(pthier): Separate baseline Sparkplug from TF arming and don't disarm
// Sparkplug here.
// TODO(liuyu): Remove Ld as arm64 after register reallocation.
__ Ld_d(kInterpreterBytecodeArrayRegister,
MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister);
Generate_OSREntry(masm, code_obj,
Operand(Code::kHeaderSize - kHeapObjectTag));
} else {
......
......@@ -875,12 +875,8 @@ static void TailCallOptimizedCodeSlot(MacroAssembler* masm,
// Check if the optimized code is marked for deopt. If it is, call the
// runtime to clear it.
__ Lw(scratch1,
FieldMemOperand(optimized_code_entry, Code::kCodeDataContainerOffset));
__ Lw(scratch1,
FieldMemOperand(scratch1, CodeDataContainer::kKindSpecificFlagsOffset));
__ And(scratch1, scratch1, Operand(1 << Code::kMarkedForDeoptimizationBit));
__ Branch(&heal_optimized_code_slot, ne, scratch1, Operand(zero_reg));
__ TestCodeTIsMarkedForDeoptimizationAndJump(optimized_code_entry, scratch1,
ne, &heal_optimized_code_slot);
// Optimized code is good, get it into the closure and link the closure into
// the optimized functions list, then tail call the optimized code.
......@@ -1044,14 +1040,22 @@ static void MaybeOptimizeCodeOrTailCallOptimizedCodeSlot(
}
namespace {
void ResetBytecodeAgeAndOsrState(MacroAssembler* masm,
void ResetBytecodeAge(MacroAssembler* masm,
Register bytecode_array) {
// Reset code age and the OSR state (optimized to a single write).
static_assert(BytecodeArray::kOsrStateAndBytecodeAgeAreContiguous32Bits);
STATIC_ASSERT(BytecodeArray::kNoAgeBytecodeAge == 0);
__ sw(zero_reg,
FieldMemOperand(bytecode_array,
BytecodeArray::kOsrUrgencyAndInstallTargetOffset));
__ sh(zero_reg,
FieldMemOperand(bytecode_array, BytecodeArray::kBytecodeAgeOffset));
}
void ResetFeedbackVectorOsrUrgency(MacroAssembler* masm,
Register feedback_vector, Register scratch) {
DCHECK(!AreAliased(feedback_vector, scratch));
__ lbu(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
__ And(scratch, scratch,
Operand(FeedbackVector::MaybeHasOptimizedOsrCodeBit::kMask));
__ sb(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
}
} // namespace
......@@ -1088,6 +1092,10 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
feedback_vector,
&has_optimized_code_or_state);
}
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, temps.Acquire());
}
// Increment invocation count for the function.
{
UseScratchRegisterScope temps(masm);
......@@ -1121,7 +1129,7 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
// the frame, so load it into a register.
Register bytecode_array = descriptor.GetRegisterParameter(
BaselineOutOfLinePrologueDescriptor::kInterpreterBytecodeArray);
ResetBytecodeAgeAndOsrState(masm, bytecode_array);
ResetBytecodeAge(masm, bytecode_array);
__ Push(argc, bytecode_array);
// Baseline code frames store the feedback vector where interpreter would
......@@ -1247,6 +1255,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
LoadTieringStateAndJumpIfNeedsProcessing(
masm, optimization_state, feedback_vector, &has_optimized_code_or_state);
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, temps.Acquire());
}
Label not_optimized;
__ bind(&not_optimized);
......@@ -1264,7 +1277,7 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
FrameScope frame_scope(masm, StackFrame::MANUAL);
__ PushStandardFrame(closure);
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister);
// Load initial bytecode offset.
__ li(kInterpreterBytecodeOffsetRegister,
......@@ -1788,44 +1801,26 @@ enum class OsrSourceTier {
};
void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
Register current_loop_depth,
Register encoded_current_bytecode_offset,
Register osr_urgency_and_install_target) {
static constexpr Register scratch = a3;
DCHECK(!AreAliased(scratch, current_loop_depth,
encoded_current_bytecode_offset,
osr_urgency_and_install_target));
// OSR based on urgency, i.e. is the OSR urgency greater than the current
// loop depth?
Label try_osr;
STATIC_ASSERT(BytecodeArray::OsrUrgencyBits::kShift == 0);
Register urgency = scratch;
__ And(urgency, osr_urgency_and_install_target,
BytecodeArray::OsrUrgencyBits::kMask);
__ Branch(&try_osr, hi, urgency, Operand(current_loop_depth));
// OSR based on the install target offset, i.e. does the current bytecode
// offset match the install target offset?
static constexpr int kMask = BytecodeArray::OsrInstallTargetBits::kMask;
Register install_target = osr_urgency_and_install_target;
__ And(install_target, osr_urgency_and_install_target, Operand(kMask));
__ Branch(&try_osr, eq, install_target,
Operand(encoded_current_bytecode_offset));
// Neither urgency nor the install target triggered, return to the caller.
// Note: the return value must be nullptr or a valid Code object.
__ Move(v0, zero_reg);
__ Ret();
__ bind(&try_osr);
Register maybe_target_code) {
Label jump_to_optimized_code;
{
// If maybe_target_code is not null, no need to call into runtime. A
// precondition here is: if maybe_target_code is a Code object, it must NOT
// be marked_for_deoptimization (callers must ensure this).
__ Branch(&jump_to_optimized_code, ne, maybe_target_code,
Operand(Smi::zero()));
}
ASM_CODE_COMMENT(masm);
{
FrameScope scope(masm, StackFrame::INTERNAL);
__ CallRuntime(Runtime::kCompileOptimizedOSR);
__ mov(maybe_target_code, v0);
}
// If the code object is null, just return to the caller.
__ Ret(eq, v0, Operand(Smi::zero()));
__ Ret(eq, maybe_target_code, Operand(Smi::zero()));
__ bind(&jump_to_optimized_code);
if (source == OsrSourceTier::kInterpreter) {
// Drop the handler frame that is be sitting on top of the actual
......@@ -1834,8 +1829,9 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
}
// Load deoptimization data from the code object.
// <deopt_data> = <code>[#deoptimization_data_offset]
__ lw(a1, MemOperand(v0, Code::kDeoptimizationDataOrInterpreterDataOffset -
kHeapObjectTag));
__ lw(a1, MemOperand(maybe_target_code,
Code::kDeoptimizationDataOrInterpreterDataOffset -
kHeapObjectTag));
// Load the OSR entrypoint offset from the deoptimization data.
// <osr_offset> = <deopt_data>[#header_size + #osr_pc_offset]
......@@ -1846,30 +1842,27 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
// Compute the target address = code_obj + header_size + osr_offset
// <entry_addr> = <code_obj> + #header_size + <osr_offset>
__ Addu(v0, v0, a1);
Generate_OSREntry(masm, v0, Operand(Code::kHeaderSize - kHeapObjectTag));
__ Addu(maybe_target_code, maybe_target_code, a1);
Generate_OSREntry(masm, maybe_target_code,
Operand(Code::kHeaderSize - kHeapObjectTag));
}
} // namespace
void Builtins::Generate_InterpreterOnStackReplacement(MacroAssembler* masm) {
using D = InterpreterOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
OnStackReplacement(masm, OsrSourceTier::kInterpreter,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
void Builtins::Generate_BaselineOnStackReplacement(MacroAssembler* masm) {
using D = BaselineOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
__ Lw(kContextRegister,
MemOperand(fp, StandardFrameConstants::kContextOffset));
MemOperand(fp, BaselineFrameConstants::kContextOffset));
OnStackReplacement(masm, OsrSourceTier::kBaseline,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
// static
......@@ -4161,12 +4154,10 @@ void Generate_BaselineOrInterpreterEntry(MacroAssembler* masm,
__ Pop(kInterpreterAccumulatorRegister);
if (is_osr) {
// TODO(pthier): Separate baseline Sparkplug from TF arming and don't disarm
// Sparkplug here.
// TODO(liuyu): Remove Ld as arm64 after register reallocation.
__ Lw(kInterpreterBytecodeArrayRegister,
MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister);
Generate_OSREntry(masm, code_obj,
Operand(Code::kHeaderSize - kHeapObjectTag));
} else {
......
......@@ -884,12 +884,8 @@ static void TailCallOptimizedCodeSlot(MacroAssembler* masm,
// Check if the optimized code is marked for deopt. If it is, call the
// runtime to clear it.
__ Ld(scratch1,
FieldMemOperand(optimized_code_entry, Code::kCodeDataContainerOffset));
__ Lw(scratch1,
FieldMemOperand(scratch1, CodeDataContainer::kKindSpecificFlagsOffset));
__ And(scratch1, scratch1, Operand(1 << Code::kMarkedForDeoptimizationBit));
__ Branch(&heal_optimized_code_slot, ne, scratch1, Operand(zero_reg));
__ TestCodeTIsMarkedForDeoptimizationAndJump(optimized_code_entry, scratch1,
ne, &heal_optimized_code_slot);
// Optimized code is good, get it into the closure and link the closure into
// the optimized functions list, then tail call the optimized code.
......@@ -1052,16 +1048,22 @@ static void MaybeOptimizeCodeOrTailCallOptimizedCodeSlot(
}
namespace {
void ResetBytecodeAgeAndOsrState(MacroAssembler* masm,
Register bytecode_array) {
// Reset code age and the OSR state (optimized to a single write).
static_assert(BytecodeArray::kOsrStateAndBytecodeAgeAreContiguous32Bits);
void ResetBytecodeAge(MacroAssembler* masm, Register bytecode_array) {
STATIC_ASSERT(BytecodeArray::kNoAgeBytecodeAge == 0);
__ Sw(zero_reg,
FieldMemOperand(bytecode_array,
BytecodeArray::kOsrUrgencyAndInstallTargetOffset));
__ Sh(zero_reg,
FieldMemOperand(bytecode_array, BytecodeArray::kBytecodeAgeOffset));
}
void ResetFeedbackVectorOsrUrgency(MacroAssembler* masm,
Register feedback_vector, Register scratch) {
DCHECK(!AreAliased(feedback_vector, scratch));
__ Lbu(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
__ And(scratch, scratch,
Operand(FeedbackVector::MaybeHasOptimizedOsrCodeBit::kMask));
__ Sb(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
}
} // namespace
// static
......@@ -1096,6 +1098,10 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
feedback_vector,
&has_optimized_code_or_state);
}
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, temps.Acquire());
}
// Increment invocation count for the function.
{
UseScratchRegisterScope temps(masm);
......@@ -1129,7 +1135,7 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
// the frame, so load it into a register.
Register bytecode_array = descriptor.GetRegisterParameter(
BaselineOutOfLinePrologueDescriptor::kInterpreterBytecodeArray);
ResetBytecodeAgeAndOsrState(masm, bytecode_array);
ResetBytecodeAge(masm, bytecode_array);
__ Push(argc, bytecode_array);
// Baseline code frames store the feedback vector where interpreter would
......@@ -1255,6 +1261,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
LoadTieringStateAndJumpIfNeedsProcessing(
masm, optimization_state, feedback_vector, &has_optimized_code_or_state);
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, temps.Acquire());
}
Label not_optimized;
__ bind(&not_optimized);
......@@ -1272,7 +1283,7 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
FrameScope frame_scope(masm, StackFrame::MANUAL);
__ PushStandardFrame(closure);
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister);
// Load initial bytecode offset.
__ li(kInterpreterBytecodeOffsetRegister,
......@@ -1793,44 +1804,26 @@ enum class OsrSourceTier {
};
void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
Register current_loop_depth,
Register encoded_current_bytecode_offset,
Register osr_urgency_and_install_target) {
static constexpr Register scratch = a3;
DCHECK(!AreAliased(scratch, current_loop_depth,
encoded_current_bytecode_offset,
osr_urgency_and_install_target));
// OSR based on urgency, i.e. is the OSR urgency greater than the current
// loop depth?
Label try_osr;
STATIC_ASSERT(BytecodeArray::OsrUrgencyBits::kShift == 0);
Register urgency = scratch;
__ And(urgency, osr_urgency_and_install_target,
BytecodeArray::OsrUrgencyBits::kMask);
__ Branch(&try_osr, hi, urgency, Operand(current_loop_depth));
// OSR based on the install target offset, i.e. does the current bytecode
// offset match the install target offset?
static constexpr int kMask = BytecodeArray::OsrInstallTargetBits::kMask;
Register install_target = osr_urgency_and_install_target;
__ And(install_target, osr_urgency_and_install_target, Operand(kMask));
__ Branch(&try_osr, eq, install_target,
Operand(encoded_current_bytecode_offset));
// Neither urgency nor the install target triggered, return to the caller.
// Note: the return value must be nullptr or a valid Code object.
__ Move(v0, zero_reg);
__ Ret();
__ bind(&try_osr);
Register maybe_target_code) {
Label jump_to_optimized_code;
{
// If maybe_target_code is not null, no need to call into runtime. A
// precondition here is: if maybe_target_code is a Code object, it must NOT
// be marked_for_deoptimization (callers must ensure this).
__ Branch(&jump_to_optimized_code, ne, maybe_target_code,
Operand(Smi::zero()));
}
ASM_CODE_COMMENT(masm);
{
FrameScope scope(masm, StackFrame::INTERNAL);
__ CallRuntime(Runtime::kCompileOptimizedOSR);
__ mov(maybe_target_code, v0);
}
// If the code object is null, just return to the caller.
__ Ret(eq, v0, Operand(Smi::zero()));
__ Ret(eq, maybe_target_code, Operand(Smi::zero()));
__ bind(&jump_to_optimized_code);
if (source == OsrSourceTier::kInterpreter) {
// Drop the handler frame that is be sitting on top of the actual
// JavaScript frame. This is the case then OSR is triggered from bytecode.
......@@ -1838,8 +1831,9 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
}
// Load deoptimization data from the code object.
// <deopt_data> = <code>[#deoptimization_data_offset]
__ Ld(a1, MemOperand(v0, Code::kDeoptimizationDataOrInterpreterDataOffset -
kHeapObjectTag));
__ Ld(a1, MemOperand(maybe_target_code,
Code::kDeoptimizationDataOrInterpreterDataOffset -
kHeapObjectTag));
// Load the OSR entrypoint offset from the deoptimization data.
// <osr_offset> = <deopt_data>[#header_size + #osr_pc_offset]
......@@ -1849,30 +1843,27 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
// Compute the target address = code_obj + header_size + osr_offset
// <entry_addr> = <code_obj> + #header_size + <osr_offset>
__ Daddu(v0, v0, a1);
Generate_OSREntry(masm, v0, Operand(Code::kHeaderSize - kHeapObjectTag));
__ Daddu(maybe_target_code, maybe_target_code, a1);
Generate_OSREntry(masm, maybe_target_code,
Operand(Code::kHeaderSize - kHeapObjectTag));
}
} // namespace
void Builtins::Generate_InterpreterOnStackReplacement(MacroAssembler* masm) {
using D = InterpreterOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
OnStackReplacement(masm, OsrSourceTier::kInterpreter,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
void Builtins::Generate_BaselineOnStackReplacement(MacroAssembler* masm) {
using D = BaselineOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
__ Ld(kContextRegister,
MemOperand(fp, StandardFrameConstants::kContextOffset));
MemOperand(fp, BaselineFrameConstants::kContextOffset));
OnStackReplacement(masm, OsrSourceTier::kBaseline,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
// static
......@@ -3739,12 +3730,10 @@ void Generate_BaselineOrInterpreterEntry(MacroAssembler* masm,
__ Pop(kInterpreterAccumulatorRegister);
if (is_osr) {
// TODO(pthier): Separate baseline Sparkplug from TF arming and don't disarm
// Sparkplug here.
// TODO(liuyu): Remove Ld as arm64 after register reallocation.
__ Ld(kInterpreterBytecodeArrayRegister,
MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister);
Generate_OSREntry(masm, code_obj,
Operand(Code::kHeaderSize - kHeapObjectTag));
} else {
......
......@@ -2977,6 +2977,22 @@ void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1,
Branch(stack_overflow, le, scratch1, Operand(scratch2));
}
void MacroAssembler::TestCodeTIsMarkedForDeoptimizationAndJump(Register codet,
Register scratch,
Condition cond,
Label* target) {
Ld_d(scratch, FieldMemOperand(codet, Code::kCodeDataContainerOffset));
Ld_wu(scratch,
FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
And(scratch, scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
Branch(target, cond, scratch, Operand(zero_reg));
}
Operand MacroAssembler::ClearedValue() const {
return Operand(
static_cast<int32_t>(HeapObjectReference::ClearedValue(isolate()).ptr()));
}
void MacroAssembler::InvokePrologue(Register expected_parameter_count,
Register actual_parameter_count,
Label* done, InvokeType type) {
......
......@@ -811,6 +811,11 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
// less efficient form using xor instead of mov is emitted.
void Swap(Register reg1, Register reg2, Register scratch = no_reg);
void TestCodeTIsMarkedForDeoptimizationAndJump(Register codet,
Register scratch,
Condition cond, Label* target);
Operand ClearedValue() const;
void PushRoot(RootIndex index) {
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
......
......@@ -4333,6 +4333,22 @@ void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1,
Branch(stack_overflow, le, scratch1, Operand(scratch2));
}
void MacroAssembler::TestCodeTIsMarkedForDeoptimizationAndJump(Register codet,
Register scratch,
Condition cond,
Label* target) {
Lw(scratch, FieldMemOperand(codet, Code::kCodeDataContainerOffset));
Lw(scratch,
FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
And(scratch, scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
Branch(target, cond, scratch, Operand(zero_reg));
}
Operand MacroAssembler::ClearedValue() const {
return Operand(
static_cast<int32_t>(HeapObjectReference::ClearedValue(isolate()).ptr()));
}
void MacroAssembler::InvokePrologue(Register expected_parameter_count,
Register actual_parameter_count,
Label* done, InvokeType type) {
......
......@@ -931,6 +931,11 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
// less efficient form using xor instead of mov is emitted.
void Swap(Register reg1, Register reg2, Register scratch = no_reg);
void TestCodeTIsMarkedForDeoptimizationAndJump(Register codet,
Register scratch,
Condition cond, Label* target);
Operand ClearedValue() const;
void PushRoot(RootIndex index) {
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
......
......@@ -4860,6 +4860,22 @@ void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1,
Branch(stack_overflow, le, scratch1, Operand(scratch2));
}
void MacroAssembler::TestCodeTIsMarkedForDeoptimizationAndJump(Register codet,
Register scratch,
Condition cond,
Label* target) {
Ld(scratch, FieldMemOperand(codet, Code::kCodeDataContainerOffset));
Lwu(scratch,
FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
And(scratch, scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
Branch(target, cond, scratch, Operand(zero_reg));
}
Operand MacroAssembler::ClearedValue() const {
return Operand(
static_cast<int32_t>(HeapObjectReference::ClearedValue(isolate()).ptr()));
}
void MacroAssembler::InvokePrologue(Register expected_parameter_count,
Register actual_parameter_count,
Label* done, InvokeType type) {
......
......@@ -983,6 +983,11 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
// less efficient form using xor instead of mov is emitted.
void Swap(Register reg1, Register reg2, Register scratch = no_reg);
void TestCodeTIsMarkedForDeoptimizationAndJump(Register codet,
Register scratch,
Condition cond, Label* target);
Operand ClearedValue() const;
void PushRoot(RootIndex index) {
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment