Commit c01bfa9a authored by Ben L. Titzer's avatar Ben L. Titzer Committed by Commit Bot

[turbofan] Implement loop rotation

This CL implements an assembly order optimization that moves blocks
that end a loop with an unconditional backedge to the beginning of
the loop, saving a branch.

R=jarin@chromium.org,mstarzinger@chromium.org
BUG=v8:8423

Change-Id: I8a5d25f5472d71227af0f623277ea8d0a8d69867
Reviewed-on: https://chromium-review.googlesource.com/c/1335944
Commit-Queue: Ben Titzer <titzer@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57572}
parent 7a682a38
......@@ -195,78 +195,71 @@ void CodeGenerator::AssembleCode() {
block_starts_.assign(code()->instruction_blocks().size(), -1);
instr_starts_.assign(code()->instructions().size(), -1);
}
// Assemble all non-deferred blocks, followed by deferred ones.
for (int deferred = 0; deferred < 2; ++deferred) {
for (const InstructionBlock* block : code()->instruction_blocks()) {
if (block->IsDeferred() == (deferred == 0)) {
continue;
}
// Align loop headers on 16-byte boundaries.
if (block->IsLoopHeader() && !tasm()->jump_optimization_info()) {
tasm()->Align(16);
}
if (info->trace_turbo_json_enabled()) {
block_starts_[block->rpo_number().ToInt()] = tasm()->pc_offset();
// Assemble instructions in assembly order.
for (const InstructionBlock* block : code()->ao_blocks()) {
// Align loop headers on 16-byte boundaries.
if (block->ShouldAlign() && !tasm()->jump_optimization_info()) {
tasm()->Align(16);
}
if (info->trace_turbo_json_enabled()) {
block_starts_[block->rpo_number().ToInt()] = tasm()->pc_offset();
}
// Bind a label for a block.
current_block_ = block->rpo_number();
unwinding_info_writer_.BeginInstructionBlock(tasm()->pc_offset(), block);
if (FLAG_code_comments) {
Vector<char> buffer = Vector<char>::New(200);
char* buffer_start = buffer.start();
LSAN_IGNORE_OBJECT(buffer_start);
int next = SNPrintF(
buffer, "-- B%d start%s%s%s%s", block->rpo_number().ToInt(),
block->IsDeferred() ? " (deferred)" : "",
block->needs_frame() ? "" : " (no frame)",
block->must_construct_frame() ? " (construct frame)" : "",
block->must_deconstruct_frame() ? " (deconstruct frame)" : "");
buffer = buffer.SubVector(next, buffer.length());
if (block->IsLoopHeader()) {
next = SNPrintF(buffer, " (loop up to %d)", block->loop_end().ToInt());
buffer = buffer.SubVector(next, buffer.length());
}
// Bind a label for a block.
current_block_ = block->rpo_number();
unwinding_info_writer_.BeginInstructionBlock(tasm()->pc_offset(), block);
if (FLAG_code_comments) {
Vector<char> buffer = Vector<char>::New(200);
char* buffer_start = buffer.start();
LSAN_IGNORE_OBJECT(buffer_start);
int next = SNPrintF(
buffer, "-- B%d start%s%s%s%s", block->rpo_number().ToInt(),
block->IsDeferred() ? " (deferred)" : "",
block->needs_frame() ? "" : " (no frame)",
block->must_construct_frame() ? " (construct frame)" : "",
block->must_deconstruct_frame() ? " (deconstruct frame)" : "");
if (block->loop_header().IsValid()) {
next = SNPrintF(buffer, " (in loop %d)", block->loop_header().ToInt());
buffer = buffer.SubVector(next, buffer.length());
if (block->IsLoopHeader()) {
next =
SNPrintF(buffer, " (loop up to %d)", block->loop_end().ToInt());
buffer = buffer.SubVector(next, buffer.length());
}
if (block->loop_header().IsValid()) {
next =
SNPrintF(buffer, " (in loop %d)", block->loop_header().ToInt());
buffer = buffer.SubVector(next, buffer.length());
}
SNPrintF(buffer, " --");
tasm()->RecordComment(buffer_start);
}
SNPrintF(buffer, " --");
tasm()->RecordComment(buffer_start);
}
frame_access_state()->MarkHasFrame(block->needs_frame());
frame_access_state()->MarkHasFrame(block->needs_frame());
tasm()->bind(GetLabel(current_block_));
tasm()->bind(GetLabel(current_block_));
TryInsertBranchPoisoning(block);
TryInsertBranchPoisoning(block);
if (block->must_construct_frame()) {
AssembleConstructFrame();
// We need to setup the root register after we assemble the prologue, to
// avoid clobbering callee saved registers in case of C linkage and
// using the roots.
// TODO(mtrofin): investigate how we can avoid doing this repeatedly.
if (linkage()->GetIncomingDescriptor()->InitializeRootRegister()) {
tasm()->InitializeRootRegister();
}
if (block->must_construct_frame()) {
AssembleConstructFrame();
// We need to setup the root register after we assemble the prologue, to
// avoid clobbering callee saved registers in case of C linkage and
// using the roots.
// TODO(mtrofin): investigate how we can avoid doing this repeatedly.
if (linkage()->GetIncomingDescriptor()->InitializeRootRegister()) {
tasm()->InitializeRootRegister();
}
}
if (FLAG_enable_embedded_constant_pool && !block->needs_frame()) {
ConstantPoolUnavailableScope constant_pool_unavailable(tasm());
result_ = AssembleBlock(block);
} else {
result_ = AssembleBlock(block);
}
if (result_ != kSuccess) return;
unwinding_info_writer_.EndInstructionBlock(block);
if (FLAG_enable_embedded_constant_pool && !block->needs_frame()) {
ConstantPoolUnavailableScope constant_pool_unavailable(tasm());
result_ = AssembleBlock(block);
} else {
result_ = AssembleBlock(block);
}
if (result_ != kSuccess) return;
unwinding_info_writer_.EndInstructionBlock(block);
}
}
// Assemble all out-of-line code.
if (ools_) {
......
......@@ -636,17 +636,12 @@ InstructionBlock::InstructionBlock(Zone* zone, RpoNumber rpo_number,
: successors_(zone),
predecessors_(zone),
phis_(zone),
ao_number_(rpo_number),
ao_number_(RpoNumber::Invalid()),
rpo_number_(rpo_number),
loop_header_(loop_header),
loop_end_(loop_end),
code_start_(-1),
code_end_(-1),
deferred_(deferred),
handler_(handler),
needs_frame_(false),
must_construct_frame_(false),
must_deconstruct_frame_(false) {}
handler_(handler) {}
size_t InstructionBlock::PredecessorIndexOf(RpoNumber rpo_number) const {
size_t j = 0;
......@@ -748,7 +743,6 @@ InstructionBlocks* InstructionSequence::InstructionBlocksFor(
DCHECK(GetRpo(*it).ToSize() == rpo_number);
(*blocks)[rpo_number] = InstructionBlockFor(zone, *it);
}
ComputeAssemblyOrder(blocks);
return blocks;
}
......@@ -807,18 +801,59 @@ void InstructionSequence::ValidateSSA() const {
}
}
void InstructionSequence::ComputeAssemblyOrder(InstructionBlocks* blocks) {
void InstructionSequence::ComputeAssemblyOrder() {
int ao = 0;
for (InstructionBlock* const block : *blocks) {
if (!block->IsDeferred()) {
block->set_ao_number(RpoNumber::FromInt(ao++));
RpoNumber invalid = RpoNumber::Invalid();
ao_blocks_ = zone()->NewArray<InstructionBlocks>(1);
new (ao_blocks_) InstructionBlocks(zone());
ao_blocks_->reserve(instruction_blocks_->size());
// Place non-deferred blocks.
for (InstructionBlock* const block : *instruction_blocks_) {
DCHECK_NOT_NULL(block);
if (block->IsDeferred()) continue; // skip deferred blocks.
if (block->ao_number() != invalid) continue; // loop rotated.
if (block->IsLoopHeader()) {
bool header_align = true;
if (FLAG_turbo_loop_rotation) {
// Perform loop rotation for non-deferred loops.
InstructionBlock* loop_end =
instruction_blocks_->at(block->loop_end().ToSize() - 1);
if (loop_end->SuccessorCount() == 1 && /* ends with goto */
loop_end != block /* not a degenerate infinite loop */) {
// If the last block has an unconditional jump back to the header,
// then move it to be in front of the header in the assembly order.
DCHECK_EQ(block->rpo_number(), loop_end->successors()[0]);
loop_end->set_ao_number(RpoNumber::FromInt(ao++));
ao_blocks_->push_back(loop_end);
// This block will be the new machine-level loop header, so align
// this block instead of the loop header block.
loop_end->set_alignment(true);
header_align = false;
}
}
block->set_alignment(header_align);
}
block->set_ao_number(RpoNumber::FromInt(ao++));
ao_blocks_->push_back(block);
}
for (InstructionBlock* const block : *blocks) {
if (block->IsDeferred()) {
// Add all leftover (deferred) blocks.
for (InstructionBlock* const block : *instruction_blocks_) {
if (block->ao_number() == invalid) {
block->set_ao_number(RpoNumber::FromInt(ao++));
ao_blocks_->push_back(block);
}
}
DCHECK_EQ(instruction_blocks_->size(), ao);
}
void InstructionSequence::RecomputeAssemblyOrderForTesting() {
RpoNumber invalid = RpoNumber::Invalid();
for (InstructionBlock* block : *instruction_blocks_) {
block->set_ao_number(invalid);
}
ComputeAssemblyOrder();
}
InstructionSequence::InstructionSequence(Isolate* isolate,
......@@ -827,6 +862,7 @@ InstructionSequence::InstructionSequence(Isolate* isolate,
: isolate_(isolate),
zone_(instruction_zone),
instruction_blocks_(instruction_blocks),
ao_blocks_(nullptr),
source_positions_(zone()),
constants_(ConstantMap::key_compare(),
ConstantMap::allocator_type(zone())),
......@@ -837,7 +873,9 @@ InstructionSequence::InstructionSequence(Isolate* isolate,
representations_(zone()),
representation_mask_(0),
deoptimization_entries_(zone()),
current_block_(nullptr) {}
current_block_(nullptr) {
ComputeAssemblyOrder();
}
int InstructionSequence::NextVirtualRegister() {
int virtual_register = next_virtual_register_++;
......
......@@ -1386,6 +1386,7 @@ class V8_EXPORT_PRIVATE InstructionBlock final
return loop_end_;
}
inline bool IsLoopHeader() const { return loop_end_.IsValid(); }
inline bool ShouldAlign() const { return alignment_; }
typedef ZoneVector<RpoNumber> Predecessors;
Predecessors& predecessors() { return predecessors_; }
......@@ -1405,6 +1406,8 @@ class V8_EXPORT_PRIVATE InstructionBlock final
void set_ao_number(RpoNumber ao_number) { ao_number_ = ao_number; }
void set_alignment(bool val) { alignment_ = val; }
bool needs_frame() const { return needs_frame_; }
void mark_needs_frame() { needs_frame_ = true; }
......@@ -1423,12 +1426,13 @@ class V8_EXPORT_PRIVATE InstructionBlock final
const RpoNumber loop_header_;
const RpoNumber loop_end_;
int32_t code_start_; // start index of arch-specific code.
int32_t code_end_; // end index of arch-specific code.
const bool deferred_; // Block contains deferred code.
int32_t code_end_ = -1; // end index of arch-specific code.
const bool deferred_ = -1; // Block contains deferred code.
const bool handler_; // Block is a handler entry point.
bool needs_frame_;
bool must_construct_frame_;
bool must_deconstruct_frame_;
bool alignment_ = false; // insert alignment before this block
bool needs_frame_ = false;
bool must_construct_frame_ = false;
bool must_deconstruct_frame_ = false;
};
class InstructionSequence;
......@@ -1461,9 +1465,6 @@ class V8_EXPORT_PRIVATE InstructionSequence final
public:
static InstructionBlocks* InstructionBlocksFor(Zone* zone,
const Schedule* schedule);
// Puts the deferred blocks last.
static void ComputeAssemblyOrder(InstructionBlocks* blocks);
InstructionSequence(Isolate* isolate, Zone* zone,
InstructionBlocks* instruction_blocks);
......@@ -1474,6 +1475,8 @@ class V8_EXPORT_PRIVATE InstructionSequence final
return *instruction_blocks_;
}
const InstructionBlocks& ao_blocks() const { return *ao_blocks_; }
int InstructionBlockCount() const {
return static_cast<int>(instruction_blocks_->size());
}
......@@ -1618,6 +1621,8 @@ class V8_EXPORT_PRIVATE InstructionSequence final
const RegisterConfiguration* regConfig);
static void ClearRegisterConfigurationForTesting();
void RecomputeAssemblyOrderForTesting();
private:
friend V8_EXPORT_PRIVATE std::ostream& operator<<(
std::ostream& os, const PrintableInstructionSequence& code);
......@@ -1627,9 +1632,13 @@ class V8_EXPORT_PRIVATE InstructionSequence final
static const RegisterConfiguration* RegisterConfigurationForTesting();
static const RegisterConfiguration* registerConfigurationForTesting_;
// Puts the deferred blocks last and may rotate loops.
void ComputeAssemblyOrder();
Isolate* isolate_;
Zone* const zone_;
InstructionBlocks* const instruction_blocks_;
InstructionBlocks* ao_blocks_;
SourcePositionMap source_positions_;
ConstantMap constants_;
Immediates immediates_;
......
......@@ -197,19 +197,12 @@ void JumpThreading::ApplyForwarding(Zone* local_zone,
}
}
// Recompute assembly order numbers.
// Renumber the blocks so that IsNextInAssemblyOrder() will return true,
// even if there are skipped blocks in-between.
int ao = 0;
for (auto const block : code->instruction_blocks()) {
if (!block->IsDeferred()) {
block->set_ao_number(RpoNumber::FromInt(ao));
if (!skip[block->rpo_number().ToInt()]) ao++;
}
}
for (auto const block : code->instruction_blocks()) {
if (block->IsDeferred()) {
block->set_ao_number(RpoNumber::FromInt(ao));
if (!skip[block->rpo_number().ToInt()]) ao++;
}
for (auto const block : code->ao_blocks()) {
block->set_ao_number(RpoNumber::FromInt(ao));
if (!skip[block->rpo_number().ToInt()]) ao++;
}
}
......
......@@ -495,6 +495,7 @@ DEFINE_BOOL(turbo_move_optimization, true, "optimize gap moves in TurboFan")
DEFINE_BOOL(turbo_jt, true, "enable jump threading in TurboFan")
DEFINE_BOOL(turbo_loop_peeling, true, "Turbofan loop peeling")
DEFINE_BOOL(turbo_loop_variable, true, "Turbofan loop variable optimization")
DEFINE_BOOL(turbo_loop_rotation, false, "Turbofan loop rotation")
DEFINE_BOOL(turbo_cf_optimization, true, "optimize control flow in TurboFan")
DEFINE_BOOL(turbo_escape, true, "enable escape analysis")
DEFINE_BOOL(turbo_allocation_folding, true, "Turbofan allocation folding")
......
......@@ -381,19 +381,12 @@ class TestEnvironment : public HandleAndZoneScope {
static constexpr int kDoubleConstantCount = 4;
TestEnvironment()
: blocks_(1, main_zone()),
: blocks_(1, NewBlock(main_zone(), RpoNumber::FromInt(0)), main_zone()),
code_(main_isolate(), main_zone(), &blocks_),
rng_(CcTest::random_number_generator()),
supported_reps_({MachineRepresentation::kTagged,
MachineRepresentation::kFloat32,
MachineRepresentation::kFloat64}) {
// Create and initialize a single empty block in blocks_.
InstructionBlock* block = new (main_zone()) InstructionBlock(
main_zone(), RpoNumber::FromInt(0), RpoNumber::Invalid(),
RpoNumber::Invalid(), false, false);
block->set_ao_number(RpoNumber::FromInt(0));
blocks_[0] = block;
stack_slot_count_ =
kTaggedSlotCount + kFloat32SlotCount + kFloat64SlotCount;
if (TestSimd128Moves()) {
......@@ -925,6 +918,11 @@ class TestEnvironment : public HandleAndZoneScope {
return allocated_constants_[rep][index];
}
static InstructionBlock* NewBlock(Zone* zone, RpoNumber rpo) {
return new (zone) InstructionBlock(zone, rpo, RpoNumber::Invalid(),
RpoNumber::Invalid(), false, false);
}
v8::base::RandomNumberGenerator* rng() const { return rng_; }
InstructionSequence* code() { return &code_; }
CallDescriptor* test_descriptor() { return test_descriptor_; }
......
......@@ -14,13 +14,11 @@ namespace compiler {
// Create InstructionBlocks with a single block.
InstructionBlocks* CreateSingleBlock(Zone* zone) {
InstructionBlocks* blocks = zone->NewArray<InstructionBlocks>(1);
new (blocks) InstructionBlocks(1, nullptr, zone);
InstructionBlock* block = new (zone)
InstructionBlock(zone, RpoNumber::FromInt(0), RpoNumber::Invalid(),
RpoNumber::Invalid(), false, false);
block->set_ao_number(RpoNumber::FromInt(0));
(*blocks)[0] = block;
InstructionBlocks* blocks = zone->NewArray<InstructionBlocks>(1);
new (blocks) InstructionBlocks(1, block, zone);
return blocks;
}
......
......@@ -613,6 +613,7 @@ TEST(FwPermuted_diamond) { RunAllPermutations<4>(RunPermutedDiamond); }
void ApplyForwarding(TestCode& code, int size, int* forward) {
code.sequence_.RecomputeAssemblyOrderForTesting();
ZoneVector<RpoNumber> vector(code.main_zone());
for (int i = 0; i < size; i++) {
vector.push_back(RpoNumber::FromInt(forward[i]));
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --turbo-loop-rotation --noliftoff --nowasm-tier-up
load("test/mjsunit/wasm/wasm-constants.js");
load("test/mjsunit/wasm/wasm-module-builder.js");
(function TestTrivialLoop1() {
print(arguments.callee.name);
var builder = new WasmModuleBuilder();
builder.addFunction("main", kSig_v_i)
.addBody([
kExprLoop, kWasmStmt,
kExprGetLocal, 0,
kExprI32Const, 1,
kExprI32Sub,
kExprTeeLocal, 0,
kExprBrIf, 0,
kExprEnd,
])
.exportFunc();
let module = new WebAssembly.Module(builder.toBuffer());
let instance = new WebAssembly.Instance(module);
instance.exports.main(1);
instance.exports.main(10);
instance.exports.main(100);
})();
(function TestTrivialLoop2() {
print(arguments.callee.name);
var builder = new WasmModuleBuilder();
builder.addFunction("main", kSig_v_i)
.addBody([
kExprLoop, kWasmStmt,
kExprGetLocal, 0,
kExprI32Const, 1,
kExprI32Sub,
kExprTeeLocal, 0,
kExprBrIf, 1,
kExprBr, 0,
kExprEnd,
])
.exportFunc();
let module = new WebAssembly.Module(builder.toBuffer());
let instance = new WebAssembly.Instance(module);
instance.exports.main(1);
instance.exports.main(10);
instance.exports.main(100);
})();
(function TestNonRotatedLoopWithStore() {
print(arguments.callee.name);
var builder = new WasmModuleBuilder();
builder.addMemory(1, undefined, false);
builder.addFunction("main", kSig_v_i)
.addBody([
kExprLoop, kWasmStmt,
kExprGetLocal, 0,
kExprI32Const, 1,
kExprI32Sub,
kExprTeeLocal, 0,
kExprBrIf, 1,
kExprI32Const, 0,
kExprI32Const, 0,
kExprI32StoreMem, 0, 0,
kExprBr, 0,
kExprEnd,
])
.exportFunc();
let module = new WebAssembly.Module(builder.toBuffer());
let instance = new WebAssembly.Instance(module);
instance.exports.main(1);
instance.exports.main(10);
instance.exports.main(100);
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment