Commit a77dd89e authored by Thibaud Michaud's avatar Thibaud Michaud Committed by V8 LUCI CQ

[regalloc] Improve gap resolver algorithm

design doc:
https://docs.google.com/document/d/1h4nxeEkMrVBd3QUmLBFGCqbq9BPUPFfkU6L0-IdvJdE/edit?usp=sharing

Bug: v8:5210, chromium:1269989, chromium:1313647
Change-Id: I31cc6c0756b3754b61c02f7c5ddde6b5016abcd0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3613322Reviewed-by: 's avatarDarius Mercadier <dmercadier@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Thibaud Michaud <thibaudm@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80816}
parent 31925e51
......@@ -1402,7 +1402,9 @@ class V8_EXPORT_PRIVATE V8_NODISCARD UseScratchRegisterScope {
bool CanAcquire() const {
return !assembler_->GetScratchRegisterList()->is_empty();
}
bool CanAcquireS() const { return CanAcquireVfp<SwVfpRegister>(); }
bool CanAcquireD() const { return CanAcquireVfp<DwVfpRegister>(); }
bool CanAcquireQ() const { return CanAcquireVfp<QwNeonRegister>(); }
void Include(const Register& reg1, const Register& reg2 = no_reg) {
RegList* available = assembler_->GetScratchRegisterList();
......@@ -1412,6 +1414,12 @@ class V8_EXPORT_PRIVATE V8_NODISCARD UseScratchRegisterScope {
available->set(reg1);
available->set(reg2);
}
void Include(VfpRegList list) {
VfpRegList* available = assembler_->GetScratchVfpRegisterList();
DCHECK_NOT_NULL(available);
DCHECK_EQ((*available & list), 0x0);
*available = *available | list;
}
void Exclude(const Register& reg1, const Register& reg2 = no_reg) {
RegList* available = assembler_->GetScratchRegisterList();
DCHECK_NOT_NULL(available);
......@@ -1419,6 +1427,12 @@ class V8_EXPORT_PRIVATE V8_NODISCARD UseScratchRegisterScope {
DCHECK_IMPLIES(reg2.is_valid(), available->has(reg2));
available->clear(RegList{reg1, reg2});
}
void Exclude(VfpRegList list) {
VfpRegList* available = assembler_->GetScratchVfpRegisterList();
DCHECK_NOT_NULL(available);
DCHECK_EQ((*available | list), *available);
*available = *available & ~list;
}
private:
friend class Assembler;
......
......@@ -883,6 +883,17 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
DISALLOW_IMPLICIT_CONSTRUCTORS(MacroAssembler);
};
struct MoveCycleState {
// List of scratch registers reserved for pending moves in a move cycle, and
// which should therefore not be used as a temporary location by
// {MoveToTempLocation}. The GP scratch register is implicitly reserved.
VfpRegList scratch_v_reglist = 0;
// Available scratch registers during the move cycle resolution scope.
base::Optional<UseScratchRegisterScope> temps;
// Code of the scratch register picked by {MoveToTempLocation}.
int scratch_reg_code = -1;
};
#define ACCESS_MASM(masm) masm->
} // namespace internal
......
......@@ -2165,10 +2165,14 @@ class V8_NODISCARD UseScratchRegisterScope {
return VRegister::Create(AcquireNextAvailable(availablefp_).code(), format);
}
bool CanAcquire() const { return !available_->IsEmpty(); }
bool CanAcquireFP() const { return !availablefp_->IsEmpty(); }
Register AcquireSameSizeAs(const Register& reg);
V8_EXPORT_PRIVATE VRegister AcquireSameSizeAs(const VRegister& reg);
void Include(const CPURegList& list) { available_->Combine(list); }
void IncludeFP(const CPURegList& list) { availablefp_->Combine(list); }
void Exclude(const CPURegList& list) {
#if DEBUG
CPURegList copy(list);
......@@ -2179,6 +2183,16 @@ class V8_NODISCARD UseScratchRegisterScope {
#endif
available_->Remove(list);
}
void ExcludeFP(const CPURegList& list) {
#if DEBUG
CPURegList copy(list);
while (!copy.IsEmpty()) {
const CPURegister& reg = copy.PopHighestIndex();
DCHECK(availablefp_->IncludesAliasOf(reg));
}
#endif
availablefp_->Remove(list);
}
void Include(const Register& reg1, const Register& reg2 = NoReg) {
CPURegList list(reg1, reg2);
Include(list);
......@@ -2187,6 +2201,7 @@ class V8_NODISCARD UseScratchRegisterScope {
CPURegList list(reg1, reg2);
Exclude(list);
}
void ExcludeFP(const VRegister& reg) { ExcludeFP(CPURegList(reg)); }
private:
V8_EXPORT_PRIVATE static CPURegister AcquireNextAvailable(
......@@ -2201,6 +2216,18 @@ class V8_NODISCARD UseScratchRegisterScope {
uint64_t old_availablefp_; // kVRegister
};
struct MoveCycleState {
// List of scratch registers reserved for pending moves in a move cycle, and
// which should therefore not be used as a temporary location by
// {MoveToTempLocation}.
RegList scratch_regs;
DoubleRegList scratch_fp_regs;
// Available scratch registers during the move cycle resolution scope.
base::Optional<UseScratchRegisterScope> temps;
// Scratch register picked by {MoveToTempLocation}.
base::Optional<CPURegister> scratch_reg;
};
} // namespace internal
} // namespace v8
......
......@@ -690,6 +690,11 @@ inline Operand FieldOperand(Register object, Register index, ScaleFactor scale,
return Operand(object, index, scale, offset - kHeapObjectTag);
}
struct MoveCycleState {
// Whether a move in the cycle needs the double scratch register.
bool pending_double_scratch_register_use = false;
};
#define ACCESS_MASM(masm) masm->
} // namespace internal
......
......@@ -961,6 +961,12 @@ inline Operand StackOperandForReturnAddress(int32_t disp) {
return Operand(rsp, disp);
}
struct MoveCycleState {
// Whether a move in the cycle needs the scratch or double scratch register.
bool pending_scratch_register_use = false;
bool pending_double_scratch_register_use = false;
};
#define ACCESS_MASM(masm) masm->
} // namespace internal
......
......@@ -3,10 +3,14 @@
// found in the LICENSE file.
#include "src/base/numbers/double.h"
#include "src/codegen/arm/assembler-arm.h"
#include "src/codegen/arm/constants-arm.h"
#include "src/codegen/arm/register-arm.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/optimized-compilation-info.h"
#include "src/common/globals.h"
#include "src/compiler/backend/code-generator-impl.h"
#include "src/compiler/backend/code-generator.h"
#include "src/compiler/backend/gap-resolver.h"
......@@ -4049,6 +4053,150 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
UNREACHABLE();
}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
auto rep = LocationOperand::cast(source)->representation();
move_cycle_.temps.emplace(tasm());
auto& temps = *move_cycle_.temps;
// Temporarily exclude the reserved scratch registers while we pick a
// location to resolve the cycle. Re-include them immediately afterwards so
// that they are available to assemble the move.
temps.Exclude(move_cycle_.scratch_v_reglist);
int reg_code = -1;
if ((!IsFloatingPoint(rep) || rep == MachineRepresentation::kFloat32) &&
temps.CanAcquireS()) {
reg_code = temps.AcquireS().code();
} else if (rep == MachineRepresentation::kFloat64 && temps.CanAcquireD()) {
reg_code = temps.AcquireD().code();
} else if (rep == MachineRepresentation::kSimd128 && temps.CanAcquireQ()) {
reg_code = temps.AcquireQ().code();
}
temps.Include(move_cycle_.scratch_v_reglist);
if (reg_code != -1) {
// A scratch register is available for this rep.
move_cycle_.scratch_reg_code = reg_code;
if (IsFloatingPoint(rep)) {
AllocatedOperand scratch(LocationOperand::REGISTER, rep, reg_code);
AssembleMove(source, &scratch);
} else {
AllocatedOperand scratch(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, reg_code);
ArmOperandConverter g(this, nullptr);
if (source->IsStackSlot()) {
__ vldr(g.ToFloatRegister(&scratch), g.ToMemOperand(source));
} else {
DCHECK(source->IsRegister());
__ vmov(g.ToFloatRegister(&scratch), g.ToRegister(source));
}
}
} else {
// The scratch registers are blocked by pending moves. Use the stack
// instead.
int new_slots = ElementSizeInPointers(rep);
ArmOperandConverter g(this, nullptr);
if (source->IsRegister()) {
__ push(g.ToRegister(source));
} else if (source->IsStackSlot()) {
UseScratchRegisterScope temps2(tasm());
Register scratch = temps2.Acquire();
__ ldr(scratch, g.ToMemOperand(source));
__ push(scratch);
} else {
// No push instruction for this operand type. Bump the stack pointer and
// assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ sub(sp, sp, Operand(new_slots * kSystemPointerSize));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
}
}
void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
MachineRepresentation rep) {
int scratch_reg_code = move_cycle_.scratch_reg_code;
DCHECK(move_cycle_.temps.has_value());
if (scratch_reg_code != -1) {
if (IsFloatingPoint(rep)) {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
scratch_reg_code);
AssembleMove(&scratch, dest);
} else {
AllocatedOperand scratch(LocationOperand::REGISTER,
MachineRepresentation::kFloat32,
scratch_reg_code);
ArmOperandConverter g(this, nullptr);
if (dest->IsStackSlot()) {
__ vstr(g.ToFloatRegister(&scratch), g.ToMemOperand(dest));
} else {
DCHECK(dest->IsRegister());
__ vmov(g.ToRegister(dest), g.ToFloatRegister(&scratch));
}
}
} else {
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
ArmOperandConverter g(this, nullptr);
if (dest->IsRegister()) {
__ pop(g.ToRegister(dest));
} else if (dest->IsStackSlot()) {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
__ pop(scratch);
__ str(scratch, g.ToMemOperand(dest));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ add(sp, sp, Operand(new_slots * kSystemPointerSize));
}
}
// Restore the default state to release the {UseScratchRegisterScope} and to
// prepare for the next cycle.
move_cycle_ = MoveCycleState();
}
void CodeGenerator::SetPendingMove(MoveOperands* move) {
InstructionOperand& source = move->source();
InstructionOperand& destination = move->destination();
MoveType::Type move_type =
MoveType::InferMove(&move->source(), &move->destination());
UseScratchRegisterScope temps(tasm());
if (move_type == MoveType::kStackToStack) {
if (source.IsStackSlot() || source.IsFloatStackSlot()) {
SwVfpRegister temp = temps.AcquireS();
move_cycle_.scratch_v_reglist |= temp.ToVfpRegList();
} else if (source.IsDoubleStackSlot()) {
DwVfpRegister temp = temps.AcquireD();
move_cycle_.scratch_v_reglist |= temp.ToVfpRegList();
} else {
QwNeonRegister temp = temps.AcquireQ();
move_cycle_.scratch_v_reglist |= temp.ToVfpRegList();
}
return;
} else if (move_type == MoveType::kConstantToStack) {
if (destination.IsStackSlot()) {
// Acquire a S register instead of a general purpose register in case
// `vstr` needs one to compute the address of `dst`.
SwVfpRegister s_temp = temps.AcquireS();
move_cycle_.scratch_v_reglist |= s_temp.ToVfpRegList();
} else if (destination.IsFloatStackSlot()) {
SwVfpRegister temp = temps.AcquireS();
move_cycle_.scratch_v_reglist |= temp.ToVfpRegList();
} else {
DwVfpRegister temp = temps.AcquireD();
move_cycle_.scratch_v_reglist |= temp.ToVfpRegList();
}
}
}
void CodeGenerator::AssembleSwap(InstructionOperand* source,
InstructionOperand* destination) {
ArmOperandConverter g(this, nullptr);
......
......@@ -4,6 +4,7 @@
#include "src/codegen/arm64/assembler-arm64-inl.h"
#include "src/codegen/arm64/macro-assembler-arm64-inl.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/optimized-compilation-info.h"
#include "src/compiler/backend/code-generator-impl.h"
#include "src/compiler/backend/code-generator.h"
......@@ -3342,6 +3343,165 @@ void CodeGenerator::PrepareForDeoptimizationExits(
}
}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
DCHECK(!source->IsImmediate());
auto rep = LocationOperand::cast(source)->representation();
move_cycle_.temps.emplace(tasm());
auto& temps = *move_cycle_.temps;
// Temporarily exclude the reserved scratch registers while we pick one to
// resolve the move cycle. Re-include them immediately afterwards as they
// might be needed for the move to the temp location.
temps.Exclude(CPURegList(64, move_cycle_.scratch_regs));
temps.ExcludeFP(CPURegList(64, move_cycle_.scratch_fp_regs));
if (!IsFloatingPoint(rep)) {
if (temps.CanAcquire()) {
Register scratch = move_cycle_.temps->AcquireX();
move_cycle_.scratch_reg.emplace(scratch);
} else if (temps.CanAcquireFP()) {
// Try to use an FP register if no GP register is available for non-FP
// moves.
DoubleRegister scratch = move_cycle_.temps->AcquireD();
move_cycle_.scratch_reg.emplace(scratch);
}
} else if (rep == MachineRepresentation::kFloat32) {
VRegister scratch = move_cycle_.temps->AcquireS();
move_cycle_.scratch_reg.emplace(scratch);
} else if (rep == MachineRepresentation::kFloat64) {
VRegister scratch = move_cycle_.temps->AcquireD();
move_cycle_.scratch_reg.emplace(scratch);
} else if (rep == MachineRepresentation::kSimd128) {
VRegister scratch = move_cycle_.temps->AcquireQ();
move_cycle_.scratch_reg.emplace(scratch);
}
temps.Include(CPURegList(64, move_cycle_.scratch_regs));
temps.IncludeFP(CPURegList(64, move_cycle_.scratch_fp_regs));
if (move_cycle_.scratch_reg.has_value()) {
// A scratch register is available for this rep.
auto& scratch_reg = *move_cycle_.scratch_reg;
if (scratch_reg.IsD() && !IsFloatingPoint(rep)) {
AllocatedOperand scratch(LocationOperand::REGISTER,
MachineRepresentation::kFloat64,
scratch_reg.code());
Arm64OperandConverter g(this, nullptr);
if (source->IsStackSlot()) {
__ Ldr(g.ToDoubleRegister(&scratch), g.ToMemOperand(source, tasm()));
} else {
DCHECK(source->IsRegister());
__ fmov(g.ToDoubleRegister(&scratch), g.ToRegister(source));
}
} else {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_reg->code());
AssembleMove(source, &scratch);
}
} else {
// The scratch registers are blocked by pending moves. Use the stack
// instead.
int new_slots = RoundUp<2>(ElementSizeInPointers(rep));
Arm64OperandConverter g(this, nullptr);
if (source->IsRegister()) {
__ Push(g.ToRegister(source), padreg);
} else if (source->IsStackSlot()) {
UseScratchRegisterScope temps2(tasm());
Register scratch = temps2.AcquireX();
__ Ldr(scratch, g.ToMemOperand(source, tasm()));
__ Push(scratch, padreg);
} else {
// No push instruction for this operand type. Bump the stack pointer and
// assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ Sub(sp, sp, Operand(new_slots * kSystemPointerSize));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
}
}
void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
MachineRepresentation rep) {
if (move_cycle_.scratch_reg.has_value()) {
auto& scratch_reg = *move_cycle_.scratch_reg;
if (!IsFloatingPoint(rep) && scratch_reg.IsD()) {
// We used a D register to move a non-FP operand, change the
// representation to correctly interpret the InstructionOperand's code.
AllocatedOperand scratch(LocationOperand::REGISTER,
MachineRepresentation::kFloat64,
move_cycle_.scratch_reg->code());
Arm64OperandConverter g(this, nullptr);
if (dest->IsStackSlot()) {
__ Str(g.ToDoubleRegister(&scratch), g.ToMemOperand(dest, tasm()));
} else {
DCHECK(dest->IsRegister());
__ fmov(g.ToRegister(dest), g.ToDoubleRegister(&scratch));
}
} else {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
move_cycle_.scratch_reg->code());
AssembleMove(&scratch, dest);
}
} else {
int new_slots = RoundUp<2>(ElementSizeInPointers(rep));
frame_access_state()->IncreaseSPDelta(-new_slots);
Arm64OperandConverter g(this, nullptr);
if (dest->IsRegister()) {
__ Pop(padreg, g.ToRegister(dest));
} else if (dest->IsStackSlot()) {
UseScratchRegisterScope temps2(tasm());
Register scratch = temps2.AcquireX();
__ Pop(padreg, scratch);
__ Str(scratch, g.ToMemOperand(dest, tasm()));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ Add(sp, sp, Operand(new_slots * kSystemPointerSize));
}
}
// Restore the default state to release the {UseScratchRegisterScope} and to
// prepare for the next cycle.
move_cycle_ = MoveCycleState();
}
void CodeGenerator::SetPendingMove(MoveOperands* move) {
auto move_type = MoveType::InferMove(&move->source(), &move->destination());
if (move_type == MoveType::kStackToStack) {
Arm64OperandConverter g(this, nullptr);
MemOperand src = g.ToMemOperand(&move->source(), tasm());
MemOperand dst = g.ToMemOperand(&move->destination(), tasm());
UseScratchRegisterScope temps(tasm());
if (move->source().IsSimd128StackSlot()) {
VRegister temp = temps.AcquireQ();
move_cycle_.scratch_fp_regs.set(temp);
} else {
Register temp = temps.AcquireX();
move_cycle_.scratch_regs.set(temp);
}
int64_t src_offset = src.offset();
unsigned src_size = CalcLSDataSize(LDR_x);
int64_t dst_offset = dst.offset();
unsigned dst_size = CalcLSDataSize(STR_x);
// Offset doesn't fit into the immediate field so the assembler will emit
// two instructions and use a second temp register.
if ((src.IsImmediateOffset() &&
!tasm()->IsImmLSScaled(src_offset, src_size) &&
!tasm()->IsImmLSUnscaled(src_offset)) ||
(dst.IsImmediateOffset() &&
!tasm()->IsImmLSScaled(dst_offset, dst_size) &&
!tasm()->IsImmLSUnscaled(dst_offset))) {
Register temp = temps.AcquireX();
move_cycle_.scratch_regs.set(temp);
}
}
}
void CodeGenerator::AssembleMove(InstructionOperand* source,
InstructionOperand* destination) {
Arm64OperandConverter g(this, nullptr);
......
......@@ -354,6 +354,10 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler {
InstructionOperand* destination) final;
void AssembleSwap(InstructionOperand* source,
InstructionOperand* destination) final;
void MoveToTempLocation(InstructionOperand* src) final;
void MoveTempLocationTo(InstructionOperand* dst,
MachineRepresentation rep) final;
void SetPendingMove(MoveOperands* move) final;
// ===========================================================================
// =================== Jump table construction methods. ======================
......@@ -473,6 +477,7 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler {
ZoneVector<int> block_starts_;
TurbolizerCodeOffsetsInfo offsets_info_;
ZoneVector<TurbolizerInstructionStartInfo> instr_starts_;
MoveCycleState move_cycle_;
const char* debug_name_ = nullptr;
};
......
This diff is collapsed.
......@@ -24,6 +24,19 @@ class GapResolver final {
// Assemble swap.
virtual void AssembleSwap(InstructionOperand* source,
InstructionOperand* destination) = 0;
// Assemble cycles.
// - {SetPendingMove} reserves scratch registers needed to perform the moves
// in the cycle.
// - {MoveToTempLocation} moves an operand to a temporary location, either
// a scratch register or a new stack slot, depending on the platform and the
// reserved registers.
// - {MoveTempLocationTo} moves the temp location to the destination,
// thereby completing the cycle.
virtual void MoveToTempLocation(InstructionOperand* src) = 0;
virtual void MoveTempLocationTo(InstructionOperand* dst,
MachineRepresentation rep) = 0;
virtual void SetPendingMove(MoveOperands* move) = 0;
};
explicit GapResolver(Assembler* assembler)
......@@ -36,6 +49,10 @@ class GapResolver final {
// Performs the given move, possibly performing other moves to unblock the
// destination operand.
void PerformMove(ParallelMove* moves, MoveOperands* move);
// Perform the move and its non-cyclic dependencies. Return the cycle if one
// is found.
base::Optional<std::vector<MoveOperands*>> PerformMoveHelper(
ParallelMove* moves, MoveOperands* move);
// Assembler used to emit moves and save registers.
Assembler* const assembler_;
......
......@@ -4221,6 +4221,79 @@ void CodeGenerator::FinishCode() {}
void CodeGenerator::PrepareForDeoptimizationExits(
ZoneDeque<DeoptimizationExit*>* exits) {}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
DCHECK(!source->IsImmediate());
auto rep = LocationOperand::cast(source)->representation();
if ((IsFloatingPoint(rep) &&
!move_cycle_.pending_double_scratch_register_use)) {
// The scratch double register is available.
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
kScratchDoubleReg.code());
AssembleMove(source, &scratch);
} else {
// The scratch register blocked by pending moves. Use the stack instead.
int new_slots = ElementSizeInPointers(rep);
IA32OperandConverter g(this, nullptr);
if (source->IsRegister()) {
__ push(g.ToRegister(source));
} else if (source->IsStackSlot() || source->IsFloatStackSlot()) {
__ push(g.ToOperand(source));
} else {
// No push instruction for this operand type. Bump the stack pointer and
// assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ sub(esp, Immediate(new_slots * kSystemPointerSize));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
}
}
void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
MachineRepresentation rep) {
if (IsFloatingPoint(rep) &&
!move_cycle_.pending_double_scratch_register_use) {
AllocatedOperand scratch(LocationOperand::REGISTER, rep,
kScratchDoubleReg.code());
AssembleMove(&scratch, dest);
} else {
IA32OperandConverter g(this, nullptr);
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
if (dest->IsRegister()) {
__ pop(g.ToRegister(dest));
} else if (dest->IsStackSlot() || dest->IsFloatStackSlot()) {
__ pop(g.ToOperand(dest));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ add(esp, Immediate(new_slots * kSystemPointerSize));
}
}
move_cycle_ = MoveCycleState();
}
void CodeGenerator::SetPendingMove(MoveOperands* move) {
InstructionOperand* source = &move->source();
InstructionOperand* destination = &move->destination();
MoveType::Type move_type = MoveType::InferMove(source, destination);
if (move_type == MoveType::kStackToStack) {
if (!source->IsStackSlot()) {
move_cycle_.pending_double_scratch_register_use = true;
}
return;
}
}
void CodeGenerator::AssembleMove(InstructionOperand* source,
InstructionOperand* destination) {
IA32OperandConverter g(this, nullptr);
......
......@@ -19,6 +19,7 @@
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/osr.h"
#include "src/execution/frame-constants.h"
#include "src/heap/memory-chunk.h"
#include "src/objects/code-kind.h"
#include "src/objects/smi.h"
......@@ -4908,10 +4909,99 @@ void CodeGenerator::IncrementStackAccessCounter(
}
}
void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
// Must be kept in sync with {MoveTempLocationTo}.
DCHECK(!source->IsImmediate());
auto rep = LocationOperand::cast(source)->representation();
if ((IsFloatingPoint(rep) &&
!move_cycle_.pending_double_scratch_register_use) ||
(!IsFloatingPoint(rep) && !move_cycle_.pending_scratch_register_use)) {
// The scratch register for this rep is available.
int scratch_reg_code = !IsFloatingPoint(rep) ? kScratchRegister.code()
: kScratchDoubleReg.code();
AllocatedOperand scratch(LocationOperand::REGISTER, rep, scratch_reg_code);
AssembleMove(source, &scratch);
} else {
// The scratch register is blocked by pending moves. Use the stack instead.
int new_slots = ElementSizeInPointers(rep);
X64OperandConverter g(this, nullptr);
if (source->IsRegister()) {
__ pushq(g.ToRegister(source));
} else if (source->IsStackSlot() || source->IsFloatStackSlot() ||
source->IsDoubleStackSlot()) {
__ pushq(g.ToOperand(source));
} else {
// No push instruction for xmm registers / 128-bit memory operands. Bump
// the stack pointer and assemble the move.
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
__ subq(rsp, Immediate(new_slots * kSystemPointerSize));
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(source, &temp);
}
frame_access_state()->IncreaseSPDelta(new_slots);
}
}
void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
MachineRepresentation rep) {
if ((IsFloatingPoint(rep) &&
!move_cycle_.pending_double_scratch_register_use) ||
(!IsFloatingPoint(rep) && !move_cycle_.pending_scratch_register_use)) {
int scratch_reg_code = !IsFloatingPoint(rep) ? kScratchRegister.code()
: kScratchDoubleReg.code();
AllocatedOperand scratch(LocationOperand::REGISTER, rep, scratch_reg_code);
AssembleMove(&scratch, dest);
} else {
X64OperandConverter g(this, nullptr);
int new_slots = ElementSizeInPointers(rep);
frame_access_state()->IncreaseSPDelta(-new_slots);
if (dest->IsRegister()) {
__ popq(g.ToRegister(dest));
} else if (dest->IsStackSlot() || dest->IsFloatStackSlot() ||
dest->IsDoubleStackSlot()) {
__ popq(g.ToOperand(dest));
} else {
int last_frame_slot_id =
frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
int sp_delta = frame_access_state_->sp_delta();
int temp_slot = last_frame_slot_id + sp_delta + new_slots;
AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
AssembleMove(&temp, dest);
__ addq(rsp, Immediate(new_slots * kSystemPointerSize));
}
}
move_cycle_ = MoveCycleState();
}
void CodeGenerator::SetPendingMove(MoveOperands* move) {
MoveType::Type move_type =
MoveType::InferMove(&move->source(), &move->destination());
if (move_type == MoveType::kConstantToStack) {
X64OperandConverter g(this, nullptr);
Constant src = g.ToConstant(&move->source());
if (move->destination().IsStackSlot() &&
(RelocInfo::IsWasmReference(src.rmode()) ||
(src.type() != Constant::kInt32 && src.type() != Constant::kInt64))) {
move_cycle_.pending_scratch_register_use = true;
}
} else if (move_type == MoveType::kStackToStack) {
if (move->source().IsFPLocationOperand()) {
move_cycle_.pending_double_scratch_register_use = true;
} else {
move_cycle_.pending_scratch_register_use = true;
}
}
}
void CodeGenerator::AssembleMove(InstructionOperand* source,
InstructionOperand* destination) {
X64OperandConverter g(this, nullptr);
// Helper function to write the given constant to the dst register.
// If a move type needs the scratch register, this also needs to be recorded
// in {SetPendingMove} to avoid conflicts with the gap resolver.
auto MoveConstantToRegister = [&](Register dst, Constant src) {
switch (src.type()) {
case Constant::kInt32: {
......
......@@ -73,6 +73,32 @@ class InterpreterState {
}
}
void MoveToTempLocation(InstructionOperand& source) {
scratch_ = KeyFor(source);
}
void MoveFromTempLocation(InstructionOperand& dst) {
AllocatedOperand src(scratch_.kind, scratch_.rep, scratch_.index);
if (kFPAliasing == AliasingKind::kCombine && src.IsFPLocationOperand() &&
dst.IsFPLocationOperand()) {
// Canonicalize FP location-location moves by fragmenting them into
// an equivalent sequence of float32 moves, to simplify state
// equivalence testing.
std::vector<InstructionOperand> src_fragments;
GetCanonicalOperands(src, &src_fragments);
CHECK(!src_fragments.empty());
std::vector<InstructionOperand> dst_fragments;
GetCanonicalOperands(dst, &dst_fragments);
CHECK_EQ(src_fragments.size(), dst_fragments.size());
for (size_t i = 0; i < src_fragments.size(); ++i) {
write(dst_fragments[i], KeyFor(src_fragments[i]));
}
return;
}
write(dst, scratch_);
}
bool operator==(const InterpreterState& other) const {
return values_ == other.values_;
}
......@@ -183,6 +209,7 @@ class InterpreterState {
}
OperandMap values_;
Key scratch_;
};
// An abstract interpreter for moves, swaps and parallel moves.
......@@ -190,13 +217,20 @@ class MoveInterpreter : public GapResolver::Assembler {
public:
explicit MoveInterpreter(Zone* zone) : zone_(zone) {}
void MoveToTempLocation(InstructionOperand* source) final {
state_.MoveToTempLocation(*source);
}
void MoveTempLocationTo(InstructionOperand* dest,
MachineRepresentation rep) final {
state_.MoveFromTempLocation(*dest);
}
void SetPendingMove(MoveOperands* move) final {}
void AssembleMove(InstructionOperand* source,
InstructionOperand* destination) override {
ParallelMove* moves = zone_->New<ParallelMove>(zone_);
moves->AddMove(*source, *destination);
state_.ExecuteInParallel(moves);
}
void AssembleSwap(InstructionOperand* source,
InstructionOperand* destination) override {
ParallelMove* moves = zone_->New<ParallelMove>(zone_);
......@@ -204,7 +238,6 @@ class MoveInterpreter : public GapResolver::Assembler {
moves->AddMove(*destination, *source);
state_.ExecuteInParallel(moves);
}
void AssembleParallelMove(const ParallelMove* moves) {
state_.ExecuteInParallel(moves);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment