ARM64: Support arbitrary offset in load/store pair.

TF calls can generate code exceeding the instruction range.

BUG=
R=bmeurer@chromium.org

Review URL: https://codereview.chromium.org/440303004

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22969 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 241c1311
......@@ -2503,6 +2503,12 @@ bool Assembler::IsImmLSScaled(ptrdiff_t offset, LSDataSize size) {
}
bool Assembler::IsImmLSPair(ptrdiff_t offset, LSDataSize size) {
bool offset_is_size_multiple = (((offset >> size) << size) == offset);
return offset_is_size_multiple && is_int7(offset >> size);
}
// Test if a given value can be encoded in the immediate field of a logical
// instruction.
// If it can be encoded, the function returns true, and values pointed to by n,
......
......@@ -1945,6 +1945,10 @@ class Assembler : public AssemblerBase {
static bool IsImmLSUnscaled(ptrdiff_t offset);
static bool IsImmLSScaled(ptrdiff_t offset, LSDataSize size);
void LoadStorePair(const CPURegister& rt, const CPURegister& rt2,
const MemOperand& addr, LoadStorePairOp op);
static bool IsImmLSPair(ptrdiff_t offset, LSDataSize size);
void Logical(const Register& rd,
const Register& rn,
const Operand& operand,
......@@ -2027,10 +2031,6 @@ class Assembler : public AssemblerBase {
const Operand& operand,
FlagsUpdate S,
Instr op);
void LoadStorePair(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& addr,
LoadStorePairOp op);
void LoadStorePairNonTemporal(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& addr,
......
......@@ -299,6 +299,16 @@ LS_MACRO_LIST(DEFINE_FUNCTION)
#undef DEFINE_FUNCTION
#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
void MacroAssembler::FN(const REGTYPE REG, const REGTYPE REG2, \
const MemOperand& addr) { \
DCHECK(allow_macro_instructions_); \
LoadStorePairMacro(REG, REG2, addr, OP); \
}
LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
#undef DEFINE_FUNCTION
void MacroAssembler::Asr(const Register& rd,
const Register& rn,
unsigned shift) {
......@@ -861,25 +871,6 @@ void MacroAssembler::Ldnp(const CPURegister& rt,
}
void MacroAssembler::Ldp(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& src) {
DCHECK(allow_macro_instructions_);
DCHECK(!AreAliased(rt, rt2));
ldp(rt, rt2, src);
}
void MacroAssembler::Ldpsw(const Register& rt,
const Register& rt2,
const MemOperand& src) {
DCHECK(allow_macro_instructions_);
DCHECK(!rt.IsZero());
DCHECK(!rt2.IsZero());
ldpsw(rt, rt2, src);
}
void MacroAssembler::Ldr(const CPURegister& rt, const Immediate& imm) {
DCHECK(allow_macro_instructions_);
ldr(rt, imm);
......@@ -1136,14 +1127,6 @@ void MacroAssembler::Stnp(const CPURegister& rt,
}
void MacroAssembler::Stp(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& dst) {
DCHECK(allow_macro_instructions_);
stp(rt, rt2, dst);
}
void MacroAssembler::Sxtb(const Register& rd, const Register& rn) {
DCHECK(allow_macro_instructions_);
DCHECK(!rd.IsZero());
......
......@@ -588,6 +588,39 @@ void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
}
}
void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& addr,
LoadStorePairOp op) {
// TODO(all): Should we support register offset for load-store-pair?
DCHECK(!addr.IsRegisterOffset());
int64_t offset = addr.offset();
LSDataSize size = CalcLSPairDataSize(op);
// Check if the offset fits in the immediate field of the appropriate
// instruction. If not, emit two instructions to perform the operation.
if (IsImmLSPair(offset, size)) {
// Encodable in one load/store pair instruction.
LoadStorePair(rt, rt2, addr, op);
} else {
Register base = addr.base();
if (addr.IsImmediateOffset()) {
UseScratchRegisterScope temps(this);
Register temp = temps.AcquireSameSizeAs(base);
Add(temp, base, offset);
LoadStorePair(rt, rt2, MemOperand(temp), op);
} else if (addr.IsPostIndex()) {
LoadStorePair(rt, rt2, MemOperand(base), op);
Add(base, base, offset);
} else {
DCHECK(addr.IsPreIndex());
Add(base, base, offset);
LoadStorePair(rt, rt2, MemOperand(base), op);
}
}
}
void MacroAssembler::Load(const Register& rt,
const MemOperand& addr,
......
......@@ -43,6 +43,11 @@ namespace internal {
V(Str, CPURegister&, rt, StoreOpFor(rt)) \
V(Ldrsw, Register&, rt, LDRSW_x)
#define LSPAIR_MACRO_LIST(V) \
V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \
V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \
V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x)
// ----------------------------------------------------------------------------
// Static helper functions
......@@ -261,6 +266,14 @@ class MacroAssembler : public Assembler {
const MemOperand& addr,
LoadStoreOp op);
#define DECLARE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
inline void FN(const REGTYPE REG, const REGTYPE REG2, const MemOperand& addr);
LSPAIR_MACRO_LIST(DECLARE_FUNCTION)
#undef DECLARE_FUNCTION
void LoadStorePairMacro(const CPURegister& rt, const CPURegister& rt2,
const MemOperand& addr, LoadStorePairOp op);
// V8-specific load/store helpers.
void Load(const Register& rt, const MemOperand& addr, Representation r);
void Store(const Register& rt, const MemOperand& addr, Representation r);
......@@ -418,12 +431,6 @@ class MacroAssembler : public Assembler {
inline void Ldnp(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& src);
inline void Ldp(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& src);
inline void Ldpsw(const Register& rt,
const Register& rt2,
const MemOperand& src);
// Load a literal from the inline constant pool.
inline void Ldr(const CPURegister& rt, const Immediate& imm);
// Helper function for double immediate.
......@@ -483,9 +490,6 @@ class MacroAssembler : public Assembler {
inline void Stnp(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& dst);
inline void Stp(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& dst);
inline void Sxtb(const Register& rd, const Register& rn);
inline void Sxth(const Register& rd, const Register& rn);
inline void Sxtw(const Register& rd, const Register& rn);
......
......@@ -2911,6 +2911,64 @@ TEST(ldp_stp_offset) {
}
TEST(ldp_stp_offset_wide) {
INIT_V8();
SETUP();
uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
0xffeeddccbbaa9988};
uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
// Move base too far from the array to force multiple instructions
// to be emitted.
const int64_t base_offset = 1024;
START();
__ Mov(x20, src_base - base_offset);
__ Mov(x21, dst_base - base_offset);
__ Mov(x18, src_base + base_offset + 24);
__ Mov(x19, dst_base + base_offset + 56);
__ Ldp(w0, w1, MemOperand(x20, base_offset));
__ Ldp(w2, w3, MemOperand(x20, base_offset + 4));
__ Ldp(x4, x5, MemOperand(x20, base_offset + 8));
__ Ldp(w6, w7, MemOperand(x18, -12 - base_offset));
__ Ldp(x8, x9, MemOperand(x18, -16 - base_offset));
__ Stp(w0, w1, MemOperand(x21, base_offset));
__ Stp(w2, w3, MemOperand(x21, base_offset + 8));
__ Stp(x4, x5, MemOperand(x21, base_offset + 16));
__ Stp(w6, w7, MemOperand(x19, -24 - base_offset));
__ Stp(x8, x9, MemOperand(x19, -16 - base_offset));
END();
RUN();
CHECK_EQUAL_64(0x44556677, x0);
CHECK_EQUAL_64(0x00112233, x1);
CHECK_EQUAL_64(0x0011223344556677UL, dst[0]);
CHECK_EQUAL_64(0x00112233, x2);
CHECK_EQUAL_64(0xccddeeff, x3);
CHECK_EQUAL_64(0xccddeeff00112233UL, dst[1]);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, x4);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[2]);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x5);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[3]);
CHECK_EQUAL_64(0x8899aabb, x6);
CHECK_EQUAL_64(0xbbaa9988, x7);
CHECK_EQUAL_64(0xbbaa99888899aabbUL, dst[4]);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, x8);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[5]);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x9);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[6]);
CHECK_EQUAL_64(src_base - base_offset, x20);
CHECK_EQUAL_64(dst_base - base_offset, x21);
CHECK_EQUAL_64(src_base + base_offset + 24, x18);
CHECK_EQUAL_64(dst_base + base_offset + 56, x19);
TEARDOWN();
}
TEST(ldnp_stnp_offset) {
INIT_V8();
SETUP();
......@@ -3021,6 +3079,69 @@ TEST(ldp_stp_preindex) {
}
TEST(ldp_stp_preindex_wide) {
INIT_V8();
SETUP();
uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
0xffeeddccbbaa9988};
uint64_t dst[5] = {0, 0, 0, 0, 0};
uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
// Move base too far from the array to force multiple instructions
// to be emitted.
const int64_t base_offset = 1024;
START();
__ Mov(x24, src_base - base_offset);
__ Mov(x25, dst_base + base_offset);
__ Mov(x18, dst_base + base_offset + 16);
__ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex));
__ Mov(x19, x24);
__ Mov(x24, src_base - base_offset + 4);
__ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex));
__ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PreIndex));
__ Mov(x20, x25);
__ Mov(x25, dst_base + base_offset + 4);
__ Mov(x24, src_base - base_offset);
__ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex));
__ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex));
__ Mov(x21, x24);
__ Mov(x24, src_base - base_offset + 8);
__ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex));
__ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex));
__ Mov(x22, x18);
__ Mov(x18, dst_base + base_offset + 16 + 8);
__ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex));
END();
RUN();
CHECK_EQUAL_64(0x00112233, x0);
CHECK_EQUAL_64(0xccddeeff, x1);
CHECK_EQUAL_64(0x44556677, x2);
CHECK_EQUAL_64(0x00112233, x3);
CHECK_EQUAL_64(0xccddeeff00112233UL, dst[0]);
CHECK_EQUAL_64(0x0000000000112233UL, dst[1]);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, x4);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x5);
CHECK_EQUAL_64(0x0011223344556677UL, x6);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, x7);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[2]);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[3]);
CHECK_EQUAL_64(0x0011223344556677UL, dst[4]);
CHECK_EQUAL_64(src_base, x24);
CHECK_EQUAL_64(dst_base, x25);
CHECK_EQUAL_64(dst_base + 16, x18);
CHECK_EQUAL_64(src_base + 4, x19);
CHECK_EQUAL_64(dst_base + 4, x20);
CHECK_EQUAL_64(src_base + 8, x21);
CHECK_EQUAL_64(dst_base + 24, x22);
TEARDOWN();
}
TEST(ldp_stp_postindex) {
INIT_V8();
SETUP();
......@@ -3076,6 +3197,69 @@ TEST(ldp_stp_postindex) {
}
TEST(ldp_stp_postindex_wide) {
INIT_V8();
SETUP();
uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff, 0xffeeddccbbaa9988,
0x7766554433221100};
uint64_t dst[5] = {0, 0, 0, 0, 0};
uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
// Move base too far from the array to force multiple instructions
// to be emitted.
const int64_t base_offset = 1024;
START();
__ Mov(x24, src_base);
__ Mov(x25, dst_base);
__ Mov(x18, dst_base + 16);
__ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex));
__ Mov(x19, x24);
__ Sub(x24, x24, base_offset);
__ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex));
__ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex));
__ Mov(x20, x25);
__ Sub(x24, x24, base_offset);
__ Add(x25, x25, base_offset);
__ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex));
__ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex));
__ Mov(x21, x24);
__ Sub(x24, x24, base_offset);
__ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex));
__ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex));
__ Mov(x22, x18);
__ Add(x18, x18, base_offset);
__ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex));
END();
RUN();
CHECK_EQUAL_64(0x44556677, x0);
CHECK_EQUAL_64(0x00112233, x1);
CHECK_EQUAL_64(0x00112233, x2);
CHECK_EQUAL_64(0xccddeeff, x3);
CHECK_EQUAL_64(0x4455667700112233UL, dst[0]);
CHECK_EQUAL_64(0x0000000000112233UL, dst[1]);
CHECK_EQUAL_64(0x0011223344556677UL, x4);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, x5);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, x6);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x7);
CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[2]);
CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[3]);
CHECK_EQUAL_64(0x0011223344556677UL, dst[4]);
CHECK_EQUAL_64(src_base + base_offset, x24);
CHECK_EQUAL_64(dst_base - base_offset, x25);
CHECK_EQUAL_64(dst_base - base_offset + 16, x18);
CHECK_EQUAL_64(src_base + base_offset + 4, x19);
CHECK_EQUAL_64(dst_base - base_offset + 4, x20);
CHECK_EQUAL_64(src_base + base_offset + 8, x21);
CHECK_EQUAL_64(dst_base - base_offset + 24, x22);
TEARDOWN();
}
TEST(ldp_sign_extend) {
INIT_V8();
SETUP();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment