Commit 610a3610 authored by Pierre Langlois's avatar Pierre Langlois Committed by Commit Bot

[arm] Introduce UseScratchRegisterScope for VFP registers

Replace hard-coded uses of `kScratchDoubleReg`, `kScratchDoubleReg2` and
`kScratchQuadReg` with the safer `UseScratchRegisterScope`. The reason for doing
this is to be able to safely use these scratch registers inside the assembler
without having to worry about the code generator using them too.

For instance, using this scope showed us that `TryInlineTruncateDoubleToI` is
using a FP scratch register while the caller, the `DoubleToI` stub, is using it
too. We are safe only because the stub passes the scratch register to
`TryInlineTruncateDoubleToI` as an input. Using the scope forces us to
explicitely use the input register instead of acquiring a new scratch.

Bug: v8:6553
Change-Id: I84c53cd851d31ea33b0e3ef398d7a858b7e3e3c4
Reviewed-on: https://chromium-review.googlesource.com/895460Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
Cr-Commit-Position: refs/heads/master@{#51061}
parent 43ace493
......@@ -384,6 +384,35 @@ void Assembler::set_target_address_at(Isolate* isolate, Address pc,
EnsureSpace::EnsureSpace(Assembler* assembler) { assembler->CheckBuffer(); }
template <typename T>
bool UseScratchRegisterScope::CanAcquireVfp() const {
VfpRegList* available = assembler_->GetScratchVfpRegisterList();
DCHECK_NOT_NULL(available);
for (int index = 0; index < T::kNumRegisters; index++) {
T reg = T::from_code(index);
uint64_t mask = reg.ToVfpRegList();
if ((*available & mask) == mask) {
return true;
}
}
return false;
}
template <typename T>
T UseScratchRegisterScope::AcquireVfp() {
VfpRegList* available = assembler_->GetScratchVfpRegisterList();
DCHECK_NOT_NULL(available);
for (int index = 0; index < T::kNumRegisters; index++) {
T reg = T::from_code(index);
uint64_t mask = reg.ToVfpRegList();
if ((*available & mask) == mask) {
*available &= ~mask;
return reg;
}
}
UNREACHABLE();
}
} // namespace internal
} // namespace v8
......
......@@ -566,10 +566,16 @@ Assembler::Assembler(IsolateData isolate_data, void* buffer, int buffer_size)
// it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
// its use consistent with other features, we always enable it if we can.
EnableCpuFeature(VFP32DREGS);
// Make sure we pick two D registers which alias a Q register. This way, we
// can use a Q as a scratch if NEON is supported.
scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList();
} else {
// When VFP32DREGS is not supported, d15 become allocatable. Therefore we
// cannot use it as a scratch.
scratch_vfp_register_list_ = d14.ToVfpRegList();
}
}
Assembler::~Assembler() {
DCHECK_EQ(const_pool_blocked_nesting_, 0);
DCHECK_EQ(code_target_sharing_blocked_nesting_, 0);
......@@ -1214,6 +1220,7 @@ void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
DCHECK(x.IsImmediate());
// Upon failure to encode, the opcode should not have changed.
DCHECK(opcode == (instr & kOpCodeMask));
UseScratchRegisterScope temps(this);
Condition cond = Instruction::ConditionField(instr);
if ((opcode == MOV) && !set_flags) {
// Generate a sequence of mov instructions or a load from the constant
......@@ -1221,7 +1228,7 @@ void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
DCHECK(!rn.is_valid());
Move32BitImmediate(rd, x, cond);
} else if ((opcode == ADD) && !set_flags && (rd == rn) &&
(scratch_register_list_ == 0)) {
!temps.CanAcquire()) {
// Split the operation into a sequence of additions if we cannot use a
// scratch register. In this case, we cannot re-use rn and the assembler
// does not have any scratch registers to spare.
......@@ -1244,7 +1251,6 @@ void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
// The immediate operand cannot be encoded as a shifter operand, so load
// it first to a scratch register and change the original instruction to
// use it.
UseScratchRegisterScope temps(this);
// Re-use the destination register if possible.
Register scratch =
(rd.is_valid() && rd != rn && rd != pc) ? rd : temps.Acquire();
......@@ -5478,19 +5484,23 @@ void PatchingAssembler::FlushICache(Isolate* isolate) {
}
UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
: available_(assembler->GetScratchRegisterList()),
old_available_(*available_) {}
: assembler_(assembler),
old_available_(*assembler->GetScratchRegisterList()),
old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {}
UseScratchRegisterScope::~UseScratchRegisterScope() {
*available_ = old_available_;
*assembler_->GetScratchRegisterList() = old_available_;
*assembler_->GetScratchVfpRegisterList() = old_available_vfp_;
}
Register UseScratchRegisterScope::Acquire() {
DCHECK_NOT_NULL(available_);
DCHECK_NE(*available_, 0);
int index = static_cast<int>(base::bits::CountTrailingZeros32(*available_));
*available_ &= ~(1UL << index);
return Register::from_code(index);
RegList* available = assembler_->GetScratchRegisterList();
DCHECK_NOT_NULL(available);
DCHECK_NE(*available, 0);
int index = static_cast<int>(base::bits::CountTrailingZeros32(*available));
Register reg = Register::from_code(index);
*available &= ~reg.bit();
return reg;
}
} // namespace internal
......
......@@ -184,6 +184,17 @@ enum SwVfpRegisterCode {
kSwVfpAfterLast
};
// Representation of a list of non-overlapping VFP registers. This list
// represents the data layout of VFP registers as a bitfield:
// S registers cover 1 bit
// D registers cover 2 bits
// Q registers cover 4 bits
//
// This way, we make sure no registers in the list ever overlap. However, a list
// may represent multiple different sets of registers,
// e.g. [d0 s2 s3] <=> [s0 s1 d1].
typedef uint64_t VfpRegList;
// Single word VFP register.
class SwVfpRegister : public RegisterBase<SwVfpRegister, kSwVfpAfterLast> {
public:
......@@ -195,6 +206,11 @@ class SwVfpRegister : public RegisterBase<SwVfpRegister, kSwVfpAfterLast> {
*vm = reg_code >> 1;
}
void split_code(int* vm, int* m) const { split_code(code(), vm, m); }
VfpRegList ToVfpRegList() const {
DCHECK(is_valid());
// Each bit in the list corresponds to a S register.
return uint64_t{0x1} << code();
}
private:
friend class RegisterBase;
......@@ -217,10 +233,6 @@ enum DoubleRegisterCode {
// Double word VFP register.
class DwVfpRegister : public RegisterBase<DwVfpRegister, kDoubleAfterLast> {
public:
// A few double registers are reserved: one as a scratch register and one to
// hold 0.0, that does not fit in the immediate field of vmov instructions.
// d14: 0.0
// d15: scratch register.
static constexpr int kSizeInBytes = 8;
inline static int NumRegisters();
......@@ -231,6 +243,11 @@ class DwVfpRegister : public RegisterBase<DwVfpRegister, kDoubleAfterLast> {
*vm = reg_code & 0x0F;
}
void split_code(int* vm, int* m) const { split_code(code(), vm, m); }
VfpRegList ToVfpRegList() const {
DCHECK(is_valid());
// A D register overlaps two S registers.
return uint64_t{0x3} << (code() * 2);
}
private:
friend class RegisterBase;
......@@ -255,6 +272,11 @@ class LowDwVfpRegister
SwVfpRegister high() const {
return SwVfpRegister::from_code(code() * 2 + 1);
}
VfpRegList ToVfpRegList() const {
DCHECK(is_valid());
// A D register overlaps two S registers.
return uint64_t{0x3} << (code() * 2);
}
private:
friend class RegisterBase;
......@@ -282,6 +304,11 @@ class QwNeonRegister : public RegisterBase<QwNeonRegister, kSimd128AfterLast> {
DwVfpRegister high() const {
return DwVfpRegister::from_code(code() * 2 + 1);
}
VfpRegList ToVfpRegList() const {
DCHECK(is_valid());
// A Q register overlaps four S registers.
return uint64_t{0xf} << (code() * 4);
}
private:
friend class RegisterBase;
......@@ -334,12 +361,6 @@ SIMD128_REGISTERS(DECLARE_SIMD128_REGISTER)
constexpr LowDwVfpRegister kFirstCalleeSavedDoubleReg = d8;
constexpr LowDwVfpRegister kLastCalleeSavedDoubleReg = d15;
constexpr LowDwVfpRegister kDoubleRegZero = d13;
constexpr LowDwVfpRegister kScratchDoubleReg = d14;
// This scratch q-register aliases d14 (kScratchDoubleReg) and d15, but is only
// used if NEON is supported, which implies VFP32DREGS. When there are only 16
// d-registers, d15 is still allocatable.
constexpr QwNeonRegister kScratchQuadReg = q7;
constexpr LowDwVfpRegister kScratchDoubleReg2 = d15;
constexpr CRegister no_creg = CRegister::no_reg();
......@@ -685,6 +706,9 @@ class Assembler : public AssemblerBase {
// register.
static constexpr int kPcLoadDelta = 8;
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
VfpRegList* GetScratchVfpRegisterList() {
return &scratch_vfp_register_list_;
}
// ---------------------------------------------------------------------------
// Code generation
......@@ -1655,6 +1679,7 @@ class Assembler : public AssemblerBase {
// Scratch registers available for use by the Assembler.
RegList scratch_register_list_;
VfpRegList scratch_vfp_register_list_;
private:
// Avoid overflows for displacements etc.
......@@ -1732,6 +1757,7 @@ class Assembler : public AssemblerBase {
friend class BlockConstPoolScope;
friend class BlockCodeTargetSharingScope;
friend class EnsureSpace;
friend class UseScratchRegisterScope;
// The following functions help with avoiding allocations of embedded heap
// objects during the code assembly phase. {RequestHeapObject} records the
......@@ -1779,12 +1805,38 @@ class UseScratchRegisterScope {
// Take a register from the list and return it.
Register Acquire();
SwVfpRegister AcquireS() { return AcquireVfp<SwVfpRegister>(); }
LowDwVfpRegister AcquireLowD() { return AcquireVfp<LowDwVfpRegister>(); }
DwVfpRegister AcquireD() {
DwVfpRegister reg = AcquireVfp<DwVfpRegister>();
DCHECK(assembler_->VfpRegisterIsAvailable(reg));
return reg;
}
QwNeonRegister AcquireQ() {
QwNeonRegister reg = AcquireVfp<QwNeonRegister>();
DCHECK(assembler_->VfpRegisterIsAvailable(reg));
return reg;
}
private:
// Currently available scratch registers.
RegList* available_;
friend class Assembler;
friend class TurboAssembler;
// Check if we have registers available to acquire.
// These methods are kept private intentionally to restrict their usage to the
// assemblers. Choosing to emit a difference instruction sequence depending on
// the availability of scratch registers is generally their job.
bool CanAcquire() const { return *assembler_->GetScratchRegisterList() != 0; }
template <typename T>
bool CanAcquireVfp() const;
template <typename T>
T AcquireVfp();
Assembler* assembler_;
// Available scratch registers at the start of this scope.
RegList old_available_;
VfpRegList old_available_vfp_;
};
} // namespace internal
......
......@@ -46,7 +46,7 @@ void DoubleToIStub::Generate(MacroAssembler* masm) {
UseScratchRegisterScope temps(masm);
Register double_low = GetRegisterThatIsNotOneOf(result_reg);
Register double_high = GetRegisterThatIsNotOneOf(result_reg, double_low);
LowDwVfpRegister double_scratch = kScratchDoubleReg;
LowDwVfpRegister double_scratch = temps.AcquireLowD();
// Save the old values from these temporary registers on the stack.
__ Push(double_high, double_low);
......
......@@ -30,9 +30,6 @@ void Deoptimizer::TableEntryGenerator::Generate() {
const int kFloatRegsSize = kFloatSize * SwVfpRegister::kNumRegisters;
// Save all allocatable VFP registers before messing with them.
DCHECK_EQ(kDoubleRegZero.code(), 13);
DCHECK_EQ(kScratchDoubleReg.code(), 14);
{
// We use a run-time check for VFP32DREGS.
CpuFeatureScope scope(masm(), VFP32DREGS,
......
......@@ -307,11 +307,11 @@ void TurboAssembler::Swap(DwVfpRegister srcdst0, DwVfpRegister srcdst1) {
if (CpuFeatures::IsSupported(NEON)) {
vswp(srcdst0, srcdst1);
} else {
DCHECK_NE(srcdst0, kScratchDoubleReg);
DCHECK_NE(srcdst1, kScratchDoubleReg);
vmov(kScratchDoubleReg, srcdst0);
UseScratchRegisterScope temps(this);
DwVfpRegister scratch = temps.AcquireD();
vmov(scratch, srcdst0);
vmov(srcdst0, srcdst1);
vmov(srcdst1, kScratchDoubleReg);
vmov(srcdst1, scratch);
}
}
......@@ -809,11 +809,14 @@ void TurboAssembler::VmovExtended(int dst_code, int src_code) {
int dst_offset = dst_code & 1;
int src_offset = src_code & 1;
if (CpuFeatures::IsSupported(NEON)) {
UseScratchRegisterScope temps(this);
DwVfpRegister scratch = temps.AcquireD();
// On Neon we can shift and insert from d-registers.
if (src_offset == dst_offset) {
// Offsets are the same, use vdup to copy the source to the opposite lane.
vdup(Neon32, kScratchDoubleReg, src_d_reg, src_offset);
src_d_reg = kScratchDoubleReg;
vdup(Neon32, scratch, src_d_reg, src_offset);
// Here we are extending the lifetime of scratch.
src_d_reg = scratch;
src_offset = dst_offset ^ 1;
}
if (dst_offset) {
......@@ -834,27 +837,30 @@ void TurboAssembler::VmovExtended(int dst_code, int src_code) {
// Without Neon, use the scratch registers to move src and/or dst into
// s-registers.
int scratchSCode = kScratchDoubleReg.low().code();
int scratchSCode2 = kScratchDoubleReg2.low().code();
UseScratchRegisterScope temps(this);
LowDwVfpRegister d_scratch = temps.AcquireLowD();
LowDwVfpRegister d_scratch2 = temps.AcquireLowD();
int s_scratch_code = d_scratch.low().code();
int s_scratch_code2 = d_scratch2.low().code();
if (src_code < SwVfpRegister::kNumRegisters) {
// src is an s-register, dst is not.
vmov(kScratchDoubleReg, dst_d_reg);
vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
vmov(d_scratch, dst_d_reg);
vmov(SwVfpRegister::from_code(s_scratch_code + dst_offset),
SwVfpRegister::from_code(src_code));
vmov(dst_d_reg, kScratchDoubleReg);
vmov(dst_d_reg, d_scratch);
} else if (dst_code < SwVfpRegister::kNumRegisters) {
// dst is an s-register, src is not.
vmov(kScratchDoubleReg, src_d_reg);
vmov(d_scratch, src_d_reg);
vmov(SwVfpRegister::from_code(dst_code),
SwVfpRegister::from_code(scratchSCode + src_offset));
SwVfpRegister::from_code(s_scratch_code + src_offset));
} else {
// Neither src or dst are s-registers. Both scratch double registers are
// available when there are 32 VFP registers.
vmov(kScratchDoubleReg, src_d_reg);
vmov(kScratchDoubleReg2, dst_d_reg);
vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
SwVfpRegister::from_code(scratchSCode2 + src_offset));
vmov(dst_d_reg, kScratchQuadReg.high());
vmov(d_scratch, src_d_reg);
vmov(d_scratch2, dst_d_reg);
vmov(SwVfpRegister::from_code(s_scratch_code + dst_offset),
SwVfpRegister::from_code(s_scratch_code2 + src_offset));
vmov(dst_d_reg, d_scratch2);
}
}
......@@ -862,11 +868,13 @@ void TurboAssembler::VmovExtended(int dst_code, const MemOperand& src) {
if (dst_code < SwVfpRegister::kNumRegisters) {
vldr(SwVfpRegister::from_code(dst_code), src);
} else {
UseScratchRegisterScope temps(this);
LowDwVfpRegister scratch = temps.AcquireLowD();
// TODO(bbudge) If Neon supported, use load single lane form of vld1.
int dst_s_code = kScratchDoubleReg.low().code() + (dst_code & 1);
vmov(kScratchDoubleReg, DwVfpRegister::from_code(dst_code / 2));
int dst_s_code = scratch.low().code() + (dst_code & 1);
vmov(scratch, DwVfpRegister::from_code(dst_code / 2));
vldr(SwVfpRegister::from_code(dst_s_code), src);
vmov(DwVfpRegister::from_code(dst_code / 2), kScratchDoubleReg);
vmov(DwVfpRegister::from_code(dst_code / 2), scratch);
}
}
......@@ -875,8 +883,10 @@ void TurboAssembler::VmovExtended(const MemOperand& dst, int src_code) {
vstr(SwVfpRegister::from_code(src_code), dst);
} else {
// TODO(bbudge) If Neon supported, use store single lane form of vst1.
int src_s_code = kScratchDoubleReg.low().code() + (src_code & 1);
vmov(kScratchDoubleReg, DwVfpRegister::from_code(src_code / 2));
UseScratchRegisterScope temps(this);
LowDwVfpRegister scratch = temps.AcquireLowD();
int src_s_code = scratch.low().code() + (src_code & 1);
vmov(scratch, DwVfpRegister::from_code(src_code / 2));
vstr(SwVfpRegister::from_code(src_s_code), dst);
}
}
......@@ -1599,13 +1609,22 @@ void MacroAssembler::TryDoubleToInt32Exact(Register result,
void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
DwVfpRegister double_input,
Label* done) {
LowDwVfpRegister double_scratch = kScratchDoubleReg;
vcvt_s32_f64(double_scratch.low(), double_input);
vmov(result, double_scratch.low());
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
SwVfpRegister single_scratch = SwVfpRegister::no_reg();
if (temps.CanAcquireVfp<SwVfpRegister>()) {
single_scratch = temps.AcquireS();
} else {
// Re-use the input as a scratch register. However, we can only do this if
// the input register is d0-d15 as there are no s32+ registers.
DCHECK_LT(double_input.code(), LowDwVfpRegister::kNumRegisters);
LowDwVfpRegister double_scratch =
LowDwVfpRegister::from_code(double_input.code());
single_scratch = double_scratch.low();
}
vcvt_s32_f64(single_scratch, double_input);
vmov(result, single_scratch);
Register scratch = temps.Acquire();
// If result is not saturated (0x7FFFFFFF or 0x80000000), we are done.
sub(scratch, result, Operand(1));
cmp(scratch, Operand(0x7FFFFFFE));
......
This diff is collapsed.
......@@ -3923,6 +3923,101 @@ TEST(use_scratch_register_scope) {
CHECK_EQ(*assm.GetScratchRegisterList(), ip.bit());
}
TEST(use_scratch_vfp_register_scope) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
Assembler assm(isolate, nullptr, 0);
VfpRegList orig_scratches = *assm.GetScratchVfpRegisterList();
if (CpuFeatures::IsSupported(VFP32DREGS)) {
CHECK_EQ(orig_scratches, d14.ToVfpRegList() | d15.ToVfpRegList());
} else {
CHECK_EQ(orig_scratches, d14.ToVfpRegList());
}
// Test each configuration of scratch registers we can have at the same time.
{
UseScratchRegisterScope temps(&assm);
SwVfpRegister s1_scratch = temps.AcquireS();
CHECK_EQ(s1_scratch, s28);
SwVfpRegister s2_scratch = temps.AcquireS();
CHECK_EQ(s2_scratch, s29);
if (CpuFeatures::IsSupported(VFP32DREGS)) {
SwVfpRegister s3_scratch = temps.AcquireS();
CHECK_EQ(s3_scratch, s30);
SwVfpRegister s4_scratch = temps.AcquireS();
CHECK_EQ(s4_scratch, s31);
}
}
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
{
UseScratchRegisterScope temps(&assm);
SwVfpRegister s1_scratch = temps.AcquireS();
CHECK_EQ(s1_scratch, s28);
SwVfpRegister s2_scratch = temps.AcquireS();
CHECK_EQ(s2_scratch, s29);
if (CpuFeatures::IsSupported(VFP32DREGS)) {
DwVfpRegister d_scratch = temps.AcquireD();
CHECK_EQ(d_scratch, d15);
}
}
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
{
UseScratchRegisterScope temps(&assm);
DwVfpRegister d_scratch = temps.AcquireD();
CHECK_EQ(d_scratch, d14);
if (CpuFeatures::IsSupported(VFP32DREGS)) {
SwVfpRegister s1_scratch = temps.AcquireS();
CHECK_EQ(s1_scratch, s30);
SwVfpRegister s2_scratch = temps.AcquireS();
CHECK_EQ(s2_scratch, s31);
}
}
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
{
UseScratchRegisterScope temps(&assm);
DwVfpRegister d1_scratch = temps.AcquireD();
CHECK_EQ(d1_scratch, d14);
if (CpuFeatures::IsSupported(VFP32DREGS)) {
DwVfpRegister d2_scratch = temps.AcquireD();
CHECK_EQ(d2_scratch, d15);
}
}
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
if (CpuFeatures::IsSupported(NEON)) {
UseScratchRegisterScope temps(&assm);
QwNeonRegister q_scratch = temps.AcquireQ();
CHECK_EQ(q_scratch, q7);
}
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
}
TEST(split_add_immediate) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment