Commit aa46ebe5 authored by bmeurer's avatar bmeurer Committed by Commit bot

[arm] Use position independent table switches.

Performance measurements show that the position independent code is
usually slightly faster than the position dependent code, and there
seems to be no noticable regression.

This also gets rid of a lot of support code that was only required to allow
embedding labels into the code stream. And it implies that neither the GC
nor the deserializer need to do anything for jump tables.

R=svenpanne@chromium.org
BUG=v8:3872
LOG=n

Review URL: https://codereview.chromium.org/1069633002

Cr-Commit-Position: refs/heads/master@{#27646}
parent e91d6960
......@@ -228,7 +228,7 @@ const char* DwVfpRegister::AllocationIndexToString(int index) {
// Implementation of RelocInfo
// static
const int RelocInfo::kApplyMask = 1 << RelocInfo::INTERNAL_REFERENCE;
const int RelocInfo::kApplyMask = 0;
bool RelocInfo::IsCodedSpecially() {
......@@ -776,20 +776,14 @@ int Assembler::target_at(int pos) {
// Emitted link to a label, not part of a branch.
return instr;
}
if ((instr & 7 * B25) == 5 * B25) {
DCHECK_EQ(5 * B25, instr & 7 * B25); // b, bl, or blx imm24
int imm26 = ((instr & kImm24Mask) << 8) >> 6;
// b, bl, or blx imm24
if ((Instruction::ConditionField(instr) == kSpecialCondition) &&
((instr & B24) != 0)) {
// blx uses bit 24 to encode bit 2 of imm26
imm26 += 2;
}
return pos + kPcLoadDelta + imm26;
}
// Internal reference to the label.
DCHECK_EQ(7 * B25 | 1 * B0, instr & (7 * B25 | 1 * B0));
int imm26 = (((instr >> 1) & kImm24Mask) << 8) >> 6;
return pos + imm26;
}
......@@ -863,25 +857,19 @@ void Assembler::target_at_put(int pos, int target_pos) {
}
return;
}
if ((instr & 7 * B25) == 5 * B25) {
// b, bl, or blx imm24
int imm26 = target_pos - (pos + kPcLoadDelta);
DCHECK_EQ(5 * B25, instr & 7 * B25); // b, bl, or blx imm24
if (Instruction::ConditionField(instr) == kSpecialCondition) {
// blx uses bit 24 to encode bit 2 of imm26
DCHECK((imm26 & 1) == 0);
DCHECK_EQ(0, imm26 & 1);
instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1) * B24;
} else {
DCHECK((imm26 & 3) == 0);
DCHECK_EQ(0, imm26 & 3);
instr &= ~kImm24Mask;
}
int imm24 = imm26 >> 2;
DCHECK(is_int24(imm24));
instr_at_put(pos, instr | (imm24 & kImm24Mask));
return;
}
// Patch internal reference to label.
DCHECK_EQ(7 * B25 | 1 * B0, instr & (7 * B25 | 1 * B0));
instr_at_put(pos, reinterpret_cast<Instr>(buffer_ + target_pos));
}
......@@ -3561,16 +3549,9 @@ void Assembler::GrowBuffer() {
reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
reloc_info_writer.last_pc() + pc_delta);
// Relocate internal references.
for (RelocIterator it(desc); !it.done(); it.next()) {
if (it.rinfo()->rmode() == RelocInfo::INTERNAL_REFERENCE) {
// Don't patch unbound internal references (bit 0 set); those are still
// hooked up in the Label chain and will be automatically patched once
// the label is bound.
int32_t* p = reinterpret_cast<int32_t*>(it.rinfo()->pc());
if ((*p & 1 * B0) == 0) *p += pc_delta;
}
}
// None of our relocation types are pc relative pointing outside the code
// buffer nor pc absolute pointing inside the code buffer, so there is no need
// to relocate any emitted relocation entries.
// Relocate pending relocation entries.
for (int i = 0; i < num_pending_32_bit_reloc_info_; i++) {
......@@ -3614,37 +3595,6 @@ void Assembler::dd(uint32_t data) {
}
void Assembler::dd(Label* label) {
CheckBuffer();
RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
if (label->is_bound()) {
uint32_t data = reinterpret_cast<uint32_t>(buffer_ + label->pos());
DCHECK_EQ(0u, data & 1 * B0);
*reinterpret_cast<uint32_t*>(pc_) = data;
pc_ += sizeof(uint32_t);
} else {
int target_pos;
if (label->is_linked()) {
// Point to previous instruction that uses the link.
target_pos = label->pos();
} else {
// First entry of the link chain points to itself.
target_pos = pc_offset();
}
label->link_to(pc_offset());
// Encode internal reference to unbound label. We set the least significant
// bit to distinguish unbound internal references in GrowBuffer() below.
int imm26 = target_pos - pc_offset();
DCHECK_EQ(0, imm26 & 3);
int imm24 = imm26 >> 2;
DCHECK(is_int24(imm24));
// We use bit pattern 0000111<imm24>1 because that doesn't match any branch
// or load that would also appear on the label chain.
emit(7 * B25 | ((imm24 & kImm24Mask) << 1) | 1 * B0);
}
}
void Assembler::emit_code_stub_address(Code* stub) {
CheckBuffer();
*reinterpret_cast<uint32_t*>(pc_) =
......
......@@ -1455,7 +1455,6 @@ class Assembler : public AssemblerBase {
// are not emitted as part of the tables generated.
void db(uint8_t data);
void dd(uint32_t data);
void dd(Label* label);
// Emits the address of the code stub's first instruction.
void emit_code_stub_address(Code* stub);
......
......@@ -854,13 +854,14 @@ void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
ArmOperandConverter i(this, instr);
Register input = i.InputRegister(0);
size_t const case_count = instr->InputCount() - 2;
// Ensure to emit the constant pool first if necessary.
__ CheckConstPool(true, true);
__ cmp(input, Operand(case_count));
__ BlockConstPoolFor(case_count + 2);
__ ldr(pc, MemOperand(pc, input, LSL, 2), lo);
__ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
__ b(GetLabel(i.InputRpo(1)));
for (size_t index = 0; index < case_count; ++index) {
__ dd(GetLabel(i.InputRpo(index + 2)));
__ b(GetLabel(i.InputRpo(index + 2)));
}
}
......
......@@ -1885,172 +1885,6 @@ TEST(code_relative_offset) {
}
TEST(jump_tables1) {
// Test jump tables with forward jumps.
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
Assembler assm(isolate, nullptr, 0);
const int kNumCases = 512;
int values[kNumCases];
isolate->random_number_generator()->NextBytes(values, sizeof(values));
Label labels[kNumCases];
__ stm(db_w, sp, lr.bit());
Label done;
__ BlockConstPoolFor(kNumCases + 2);
{
PredictableCodeSizeScope predictable(
&assm, (kNumCases + 2) * Assembler::kInstrSize);
__ ldr(pc, MemOperand(pc, r0, LSL, 2));
__ nop();
for (int i = 0; i < kNumCases; ++i) {
__ dd(&labels[i]);
}
}
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ mov(r0, Operand(values[i]));
__ b(&done);
}
__ bind(&done);
__ ldm(ia_w, sp, pc.bit());
CodeDesc desc;
assm.GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F1 f = FUNCTION_CAST<F1>(code->entry());
for (int i = 0; i < kNumCases; ++i) {
int res = reinterpret_cast<int>(CALL_GENERATED_CODE(f, i, 0, 0, 0, 0));
::printf("f(%d) = %d\n", i, res);
CHECK_EQ(values[i], res);
}
}
TEST(jump_tables2) {
// Test jump tables with backward jumps.
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
Assembler assm(isolate, nullptr, 0);
const int kNumCases = 512;
int values[kNumCases];
isolate->random_number_generator()->NextBytes(values, sizeof(values));
Label labels[kNumCases];
__ stm(db_w, sp, lr.bit());
Label done, dispatch;
__ b(&dispatch);
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ mov(r0, Operand(values[i]));
__ b(&done);
}
__ bind(&dispatch);
__ BlockConstPoolFor(kNumCases + 2);
{
PredictableCodeSizeScope predictable(
&assm, (kNumCases + 2) * Assembler::kInstrSize);
__ ldr(pc, MemOperand(pc, r0, LSL, 2));
__ nop();
for (int i = 0; i < kNumCases; ++i) {
__ dd(&labels[i]);
}
}
__ bind(&done);
__ ldm(ia_w, sp, pc.bit());
CodeDesc desc;
assm.GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F1 f = FUNCTION_CAST<F1>(code->entry());
for (int i = 0; i < kNumCases; ++i) {
int res = reinterpret_cast<int>(CALL_GENERATED_CODE(f, i, 0, 0, 0, 0));
::printf("f(%d) = %d\n", i, res);
CHECK_EQ(values[i], res);
}
}
TEST(jump_tables3) {
// Test jump tables with backward jumps and embedded heap objects.
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
Assembler assm(isolate, nullptr, 0);
const int kNumCases = 256;
Handle<Object> values[kNumCases];
for (int i = 0; i < kNumCases; ++i) {
double value = isolate->random_number_generator()->NextDouble();
values[i] = isolate->factory()->NewHeapNumber(value, IMMUTABLE, TENURED);
}
Label labels[kNumCases];
__ stm(db_w, sp, lr.bit());
Label done, dispatch;
__ b(&dispatch);
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ mov(r0, Operand(values[i]));
__ b(&done);
}
__ bind(&dispatch);
__ BlockConstPoolFor(kNumCases + 2);
{
PredictableCodeSizeScope predictable(
&assm, (kNumCases + 2) * Assembler::kInstrSize);
__ ldr(pc, MemOperand(pc, r0, LSL, 2));
__ nop();
for (int i = 0; i < kNumCases; ++i) {
__ dd(&labels[i]);
}
}
__ bind(&done);
__ ldm(ia_w, sp, pc.bit());
CodeDesc desc;
assm.GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F1 f = FUNCTION_CAST<F1>(code->entry());
for (int i = 0; i < kNumCases; ++i) {
Handle<Object> result(CALL_GENERATED_CODE(f, i, 0, 0, 0, 0), isolate);
#ifdef OBJECT_PRINT
::printf("f(%d) = ", i);
result->Print(std::cout);
::printf("\n");
#endif
CHECK(values[i].is_identical_to(result));
}
}
TEST(ARMv8_vrintX) {
// Test the vrintX floating point instructions.
CcTest::InitializeVM();
......
......@@ -1484,9 +1484,9 @@ TEST(SerializeWithHarmonyScoping) {
TEST(SerializeInternalReference) {
#ifdef V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64
return;
#endif // V8_TARGET_ARCH_ARM64
#endif
// Disable experimental natives that are loaded after deserialization.
FLAG_turbo_deoptimization = false;
FLAG_context_specialization = false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment