Commit 1634631a authored by jacob.bramley@arm.com's avatar jacob.bramley@arm.com

A64: Tidy up Push and Pop TODOs.

This addresses several TODOs:
  - Push and Pop requests can be queued up so that arrays of Registers
    can be pushed efficiently, with just one PrepareForPush/Pop.
  - PushMultipleTimes now takes an Operand. This allows variable-length
    arguments arrays to be initialized, for example.
  - A NoUseRealAbortsScope has been added to Abort so that
    AssertStackConsistency can be called from PrepareForPush without
    introducing infinite recursion.

BUG=
R=ulan@chromium.org

Review URL: https://codereview.chromium.org/169533002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19403 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent be731e6c
......@@ -523,11 +523,14 @@ void HydrogenCodeStub::GenerateLightweightMiss(MacroAssembler* masm) {
FrameScope scope(masm, StackFrame::INTERNAL);
ASSERT((descriptor->register_param_count_ == 0) ||
x0.Is(descriptor->register_params_[param_count - 1]));
// Push arguments
// TODO(jbramley): Try to push these in blocks.
MacroAssembler::PushPopQueue queue(masm);
for (int i = 0; i < param_count; ++i) {
__ Push(descriptor->register_params_[i]);
queue.Queue(descriptor->register_params_[i]);
}
queue.PushQueued();
ExternalReference miss = descriptor->miss_handler();
__ CallExternalReference(miss, descriptor->register_param_count_);
}
......
......@@ -183,7 +183,7 @@ void FullCodeGenerator::Generate() {
if (locals_count > 0) {
__ LoadRoot(x10, Heap::kUndefinedValueRootIndex);
__ PushMultipleTimes(locals_count, x10);
__ PushMultipleTimes(x10, locals_count);
}
}
......@@ -4625,20 +4625,11 @@ void FullCodeGenerator::EmitGeneratorResume(Expression *generator,
__ Ldr(w10, FieldMemOperand(x10,
SharedFunctionInfo::kFormalParameterCountOffset));
__ LoadRoot(the_hole, Heap::kTheHoleValueRootIndex);
// TODO(jbramley): Write a variant of PushMultipleTimes which takes a register
// instead of a constant count, and use it to replace this loop.
Label push_argument_holes, push_frame;
__ Bind(&push_argument_holes);
__ Subs(w10, w10, 1);
__ B(mi, &push_frame);
__ Push(the_hole);
__ B(&push_argument_holes);
__ PushMultipleTimes(the_hole, w10);
// Enter a new JavaScript frame, and initialize its slots as they were when
// the generator was suspended.
Label resume_frame;
__ Bind(&push_frame);
__ Bl(&resume_frame);
__ B(&done);
......@@ -4675,16 +4666,8 @@ void FullCodeGenerator::EmitGeneratorResume(Expression *generator,
// Otherwise, we push holes for the operand stack and call the runtime to fix
// up the stack and the handlers.
// TODO(jbramley): Write a variant of PushMultipleTimes which takes a register
// instead of a constant count, and use it to replace this loop.
Label push_operand_holes, call_resume;
__ Bind(&push_operand_holes);
__ Subs(operand_stack_size, operand_stack_size, 1);
__ B(mi, &call_resume);
__ Push(the_hole);
__ B(&push_operand_holes);
__ Bind(&call_resume);
__ PushMultipleTimes(the_hole, operand_stack_size);
__ Mov(x10, Operand(Smi::FromInt(resume_mode)));
__ Push(generator_object, result_register(), x10);
__ CallRuntime(Runtime::kResumeJSGeneratorObject, 3);
......
......@@ -1286,8 +1286,9 @@ void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
ASSERT(!csp.Is(sp_));
// TODO(jbramley): Several callers rely on this not using scratch registers,
// so we use the assembler directly here. However, this means that large
// immediate values of 'space' cannot be handled. Once we merge with V8, we
// should try to use the new scope that controls scratch register usage.
// immediate values of 'space' cannot be handled cleanly. Once we implement
// our flexible scratch register idea, we could greatly simplify this
// function.
InstructionAccurateScope scope(this);
if ((space.IsImmediate()) && !is_uint12(space.immediate())) {
// The subtract instruction supports a 12-bit immediate, shifted left by
......
......@@ -623,6 +623,56 @@ void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
}
void MacroAssembler::PushPopQueue::PushQueued() {
if (queued_.empty()) return;
masm_->PrepareForPush(size_);
int count = queued_.size();
int index = 0;
while (index < count) {
// PushHelper can only handle registers with the same size and type, and it
// can handle only four at a time. Batch them up accordingly.
CPURegister batch[4] = {NoReg, NoReg, NoReg, NoReg};
int batch_index = 0;
do {
batch[batch_index++] = queued_[index++];
} while ((batch_index < 4) && (index < count) &&
batch[0].IsSameSizeAndType(queued_[index]));
masm_->PushHelper(batch_index, batch[0].SizeInBytes(),
batch[0], batch[1], batch[2], batch[3]);
}
queued_.clear();
}
void MacroAssembler::PushPopQueue::PopQueued() {
if (queued_.empty()) return;
masm_->PrepareForPop(size_);
int count = queued_.size();
int index = 0;
while (index < count) {
// PopHelper can only handle registers with the same size and type, and it
// can handle only four at a time. Batch them up accordingly.
CPURegister batch[4] = {NoReg, NoReg, NoReg, NoReg};
int batch_index = 0;
do {
batch[batch_index++] = queued_[index++];
} while ((batch_index < 4) && (index < count) &&
batch[0].IsSameSizeAndType(queued_[index]));
masm_->PopHelper(batch_index, batch[0].SizeInBytes(),
batch[0], batch[1], batch[2], batch[3]);
}
queued_.clear();
}
void MacroAssembler::PushCPURegList(CPURegList registers) {
int size = registers.RegisterSizeInBytes();
......@@ -668,7 +718,7 @@ void MacroAssembler::PopCPURegList(CPURegList registers) {
}
void MacroAssembler::PushMultipleTimes(int count, Register src) {
void MacroAssembler::PushMultipleTimes(CPURegister src, int count) {
int size = src.SizeInBytes();
PrepareForPush(count, size);
......@@ -703,6 +753,51 @@ void MacroAssembler::PushMultipleTimes(int count, Register src) {
}
void MacroAssembler::PushMultipleTimes(CPURegister src, Register count) {
PrepareForPush(Operand(count, UXTW, WhichPowerOf2(src.SizeInBytes())));
Register temp = AppropriateTempFor(count);
if (FLAG_optimize_for_size) {
Label loop, done;
Subs(temp, count, 1);
B(mi, &done);
// Push all registers individually, to save code size.
Bind(&loop);
Subs(temp, temp, 1);
PushHelper(1, src.SizeInBytes(), src, NoReg, NoReg, NoReg);
B(pl, &loop);
Bind(&done);
} else {
Label loop, leftover2, leftover1, done;
Subs(temp, count, 4);
B(mi, &leftover2);
// Push groups of four first.
Bind(&loop);
Subs(temp, temp, 4);
PushHelper(4, src.SizeInBytes(), src, src, src, src);
B(pl, &loop);
// Push groups of two.
Bind(&leftover2);
Tbz(count, 1, &leftover1);
PushHelper(2, src.SizeInBytes(), src, src, NoReg, NoReg);
// Push the last one (if required).
Bind(&leftover1);
Tbz(count, 0, &done);
PushHelper(1, src.SizeInBytes(), src, NoReg, NoReg, NoReg);
Bind(&done);
}
}
void MacroAssembler::PushHelper(int count, int size,
const CPURegister& src0,
const CPURegister& src1,
......@@ -784,30 +879,39 @@ void MacroAssembler::PopHelper(int count, int size,
}
void MacroAssembler::PrepareForPush(int count, int size) {
// TODO(jbramley): Use AssertStackConsistency here, if possible. See the
// AssertStackConsistency for details of why we can't at the moment.
void MacroAssembler::PrepareForPush(Operand total_size) {
AssertStackConsistency();
if (csp.Is(StackPointer())) {
// If the current stack pointer is csp, then it must be aligned to 16 bytes
// on entry and the total size of the specified registers must also be a
// multiple of 16 bytes.
ASSERT((count * size) % 16 == 0);
if (total_size.IsImmediate()) {
ASSERT((total_size.immediate() % 16) == 0);
}
// Don't check access size for non-immediate sizes. It's difficult to do
// well, and it will be caught by hardware (or the simulator) anyway.
} else {
// Even if the current stack pointer is not the system stack pointer (csp),
// the system stack pointer will still be modified in order to comply with
// ABI rules about accessing memory below the system stack pointer.
BumpSystemStackPointer(count * size);
BumpSystemStackPointer(total_size);
}
}
void MacroAssembler::PrepareForPop(int count, int size) {
void MacroAssembler::PrepareForPop(Operand total_size) {
AssertStackConsistency();
if (csp.Is(StackPointer())) {
// If the current stack pointer is csp, then it must be aligned to 16 bytes
// on entry and the total size of the specified registers must also be a
// multiple of 16 bytes.
ASSERT((count * size) % 16 == 0);
if (total_size.IsImmediate()) {
ASSERT((total_size.immediate() % 16) == 0);
}
// Don't check access size for non-immediate sizes. It's difficult to do
// well, and it will be caught by hardware (or the simulator) anyway.
}
}
......@@ -903,15 +1007,24 @@ void MacroAssembler::PopCalleeSavedRegisters() {
void MacroAssembler::AssertStackConsistency() {
if (emit_debug_code() && !csp.Is(StackPointer())) {
if (emit_debug_code()) {
if (csp.Is(StackPointer())) {
// TODO(jbramley): Check for csp alignment if it is the stack pointer.
} else {
// TODO(jbramley): Currently we cannot use this assertion in Push because
// some calling code assumes that the flags are preserved. For an example,
// look at Builtins::Generate_ArgumentsAdaptorTrampoline.
Cmp(csp, StackPointer());
Check(ls, kTheCurrentStackPointerIsBelowCsp);
// We can't check the alignment of csp without using a scratch register
// (or clobbering the flags), but the processor (or simulator) will abort
// if it is not properly aligned during a load.
ldr(xzr, MemOperand(csp, 0));
} else if (FLAG_enable_slow_asserts) {
Label ok;
// Check that csp <= StackPointer(), preserving all registers and NZCV.
sub(StackPointer(), csp, StackPointer());
cbz(StackPointer(), &ok); // Ok if csp == StackPointer().
tbnz(StackPointer(), kXSignBit, &ok); // Ok if csp < StackPointer().
Abort(kTheCurrentStackPointerIsBelowCsp);
bind(&ok);
// Restore StackPointer().
sub(StackPointer(), csp, StackPointer());
}
}
}
......@@ -4344,6 +4457,9 @@ void MacroAssembler::Abort(BailoutReason reason) {
Adr(x0, &msg_address);
if (use_real_aborts()) {
// Avoid infinite recursion; Push contains some assertions that use Abort.
NoUseRealAbortsScope no_real_aborts(this);
// Split the message pointer into two SMI to avoid the GC
// trying to scan the string.
STATIC_ASSERT((kSmiShift == 32) && (kSmiTag == 0));
......
......@@ -28,6 +28,8 @@
#ifndef V8_A64_MACRO_ASSEMBLER_A64_H_
#define V8_A64_MACRO_ASSEMBLER_A64_H_
#include <vector>
#include "v8globals.h"
#include "globals.h"
......@@ -517,7 +519,8 @@ class MacroAssembler : public Assembler {
}
// Push the specified register 'count' times.
void PushMultipleTimes(int count, Register src);
void PushMultipleTimes(CPURegister src, Register count);
void PushMultipleTimes(CPURegister src, int count);
// This is a convenience method for pushing a single Handle<Object>.
inline void Push(Handle<Object> handle);
......@@ -531,6 +534,35 @@ class MacroAssembler : public Assembler {
Pop(dst);
}
// Sometimes callers need to push or pop multiple registers in a way that is
// difficult to structure efficiently for fixed Push or Pop calls. This scope
// allows push requests to be queued up, then flushed at once. The
// MacroAssembler will try to generate the most efficient sequence required.
//
// Unlike the other Push and Pop macros, PushPopQueue can handle mixed sets of
// register sizes and types.
class PushPopQueue {
public:
explicit PushPopQueue(MacroAssembler* masm) : masm_(masm), size_(0) { }
~PushPopQueue() {
ASSERT(queued_.empty());
}
void Queue(const CPURegister& rt) {
size_ += rt.SizeInBytes();
queued_.push_back(rt);
}
void PushQueued();
void PopQueued();
private:
MacroAssembler* masm_;
int size_;
std::vector<CPURegister> queued_;
};
// Poke 'src' onto the stack. The offset is in bytes.
//
// If the current stack pointer (according to StackPointer()) is csp, then
......@@ -2001,9 +2033,12 @@ class MacroAssembler : public Assembler {
// Perform necessary maintenance operations before a push or pop.
//
// Note that size is per register, and is specified in bytes.
void PrepareForPush(int count, int size);
void PrepareForPop(int count, int size);
// Note that size is specified in bytes.
void PrepareForPush(Operand total_size);
void PrepareForPop(Operand total_size);
void PrepareForPush(int count, int size) { PrepareForPush(count * size); }
void PrepareForPop(int count, int size) { PrepareForPop(count * size); }
// Call Printf. On a native build, a simple call will be generated, but if the
// simulator is being used then a suitable pseudo-instruction is used. The
......
......@@ -754,14 +754,16 @@ static void GenerateFastApiCall(MacroAssembler* masm,
int argc,
Register* values) {
ASSERT(!AreAliased(receiver, scratch));
__ Push(receiver);
// Write the arguments to stack frame.
MacroAssembler::PushPopQueue queue(masm);
queue.Queue(receiver);
// Write the arguments to the stack frame.
for (int i = 0; i < argc; i++) {
// TODO(jbramley): Push these in as few Push() calls as possible.
Register arg = values[argc-1-i];
ASSERT(!AreAliased(receiver, scratch, arg));
__ Push(arg);
queue.Queue(arg);
}
queue.PushQueued();
ASSERT(optimization.is_simple_api_call());
......
......@@ -1332,6 +1332,7 @@ class MaybeObject BASE_EMBEDDED {
"The instruction to patch should be an ori") \
V(kTheSourceAndDestinationAreTheSame, \
"The source and destination are the same") \
V(kTheStackPointerIsNotAligned, "The stack pointer is not aligned.") \
V(kTheStackWasCorruptedByMacroAssemblerCall, \
"The stack was corrupted by MacroAssembler::Call()") \
V(kTooManyParametersLocals, "Too many parameters/locals") \
......
......@@ -8110,7 +8110,8 @@ static void PushPopJsspWXOverlapHelper(int reg_count, int claim) {
SETUP_SIZE(BUF_SIZE * 2);
// Work out which registers to use, based on reg_size.
static RegList const allowed = ~(x8.Bit() | x9.Bit() | jssp.Bit());
Register tmp = x8;
static RegList const allowed = ~(tmp.Bit() | jssp.Bit());
if (reg_count == kPushPopJsspMaxRegCount) {
reg_count = CountSetBits(allowed, kNumberOfRegisters);
}
......@@ -8196,7 +8197,13 @@ static void PushPopJsspWXOverlapHelper(int reg_count, int claim) {
int times = i % 4 + 1;
if (i & 1) {
// Push odd-numbered registers as W registers.
__ PushMultipleTimes(times, w[i]);
if (i & 2) {
__ PushMultipleTimes(w[i], times);
} else {
// Use a register to specify the count.
__ Mov(tmp.W(), times);
__ PushMultipleTimes(w[i], tmp.W());
}
// Fill in the expected stack slots.
for (int j = 0; j < times; j++) {
if (w[i].Is(wzr)) {
......@@ -8208,7 +8215,13 @@ static void PushPopJsspWXOverlapHelper(int reg_count, int claim) {
}
} else {
// Push even-numbered registers as X registers.
__ PushMultipleTimes(times, x[i]);
if (i & 2) {
__ PushMultipleTimes(x[i], times);
} else {
// Use a register to specify the count.
__ Mov(tmp, times);
__ PushMultipleTimes(x[i], tmp);
}
// Fill in the expected stack slots.
for (int j = 0; j < times; j++) {
if (x[i].IsZero()) {
......@@ -8399,6 +8412,156 @@ TEST(push_pop_csp) {
}
TEST(push_queued) {
INIT_V8();
SETUP();
START();
ASSERT(__ StackPointer().Is(csp));
__ Mov(jssp, __ StackPointer());
__ SetStackPointer(jssp);
MacroAssembler::PushPopQueue queue(&masm);
// Queue up registers.
queue.Queue(x0);
queue.Queue(x1);
queue.Queue(x2);
queue.Queue(x3);
queue.Queue(w4);
queue.Queue(w5);
queue.Queue(w6);
queue.Queue(d0);
queue.Queue(d1);
queue.Queue(s2);
__ Mov(x0, 0x1234000000000000);
__ Mov(x1, 0x1234000100010001);
__ Mov(x2, 0x1234000200020002);
__ Mov(x3, 0x1234000300030003);
__ Mov(w4, 0x12340004);
__ Mov(w5, 0x12340005);
__ Mov(w6, 0x12340006);
__ Fmov(d0, 123400.0);
__ Fmov(d1, 123401.0);
__ Fmov(s2, 123402.0);
// Actually push them.
queue.PushQueued();
Clobber(&masm, CPURegList(CPURegister::kRegister, kXRegSize, 0, 6));
Clobber(&masm, CPURegList(CPURegister::kFPRegister, kDRegSize, 0, 2));
// Pop them conventionally.
__ Pop(s2);
__ Pop(d1, d0);
__ Pop(w6, w5, w4);
__ Pop(x3, x2, x1, x0);
__ Mov(csp, __ StackPointer());
__ SetStackPointer(csp);
END();
RUN();
ASSERT_EQUAL_64(0x1234000000000000, x0);
ASSERT_EQUAL_64(0x1234000100010001, x1);
ASSERT_EQUAL_64(0x1234000200020002, x2);
ASSERT_EQUAL_64(0x1234000300030003, x3);
ASSERT_EQUAL_32(0x12340004, w4);
ASSERT_EQUAL_32(0x12340005, w5);
ASSERT_EQUAL_32(0x12340006, w6);
ASSERT_EQUAL_FP64(123400.0, d0);
ASSERT_EQUAL_FP64(123401.0, d1);
ASSERT_EQUAL_FP32(123402.0, s2);
TEARDOWN();
}
TEST(pop_queued) {
INIT_V8();
SETUP();
START();
ASSERT(__ StackPointer().Is(csp));
__ Mov(jssp, __ StackPointer());
__ SetStackPointer(jssp);
MacroAssembler::PushPopQueue queue(&masm);
__ Mov(x0, 0x1234000000000000);
__ Mov(x1, 0x1234000100010001);
__ Mov(x2, 0x1234000200020002);
__ Mov(x3, 0x1234000300030003);
__ Mov(w4, 0x12340004);
__ Mov(w5, 0x12340005);
__ Mov(w6, 0x12340006);
__ Fmov(d0, 123400.0);
__ Fmov(d1, 123401.0);
__ Fmov(s2, 123402.0);
// Push registers conventionally.
__ Push(x0, x1, x2, x3);
__ Push(w4, w5, w6);
__ Push(d0, d1);
__ Push(s2);
// Queue up a pop.
queue.Queue(s2);
queue.Queue(d1);
queue.Queue(d0);
queue.Queue(w6);
queue.Queue(w5);
queue.Queue(w4);
queue.Queue(x3);
queue.Queue(x2);
queue.Queue(x1);
queue.Queue(x0);
Clobber(&masm, CPURegList(CPURegister::kRegister, kXRegSize, 0, 6));
Clobber(&masm, CPURegList(CPURegister::kFPRegister, kDRegSize, 0, 2));
// Actually pop them.
queue.PopQueued();
__ Mov(csp, __ StackPointer());
__ SetStackPointer(csp);
END();
RUN();
ASSERT_EQUAL_64(0x1234000000000000, x0);
ASSERT_EQUAL_64(0x1234000100010001, x1);
ASSERT_EQUAL_64(0x1234000200020002, x2);
ASSERT_EQUAL_64(0x1234000300030003, x3);
ASSERT_EQUAL_64(0x0000000012340004, x4);
ASSERT_EQUAL_64(0x0000000012340005, x5);
ASSERT_EQUAL_64(0x0000000012340006, x6);
ASSERT_EQUAL_FP64(123400.0, d0);
ASSERT_EQUAL_FP64(123401.0, d1);
ASSERT_EQUAL_FP32(123402.0, s2);
TEARDOWN();
}
TEST(jump_both_smi) {
INIT_V8();
SETUP();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment