Commit b0801678 authored by Tobias Tebbi's avatar Tobias Tebbi Committed by Commit Bot

[csa] block cloning to optimize branch on phi

This allows to return bool values from Torque macros and branch on them
without performance penalty, reconstructing good control flow.

Drive-by cleanup: Delete EnsureDeferredCodeSingleEntryPoint(), since
it's no longer needed. Constructing a graph and then re-inferring
deferred blocks based on branch hints achieves this effect
automatically.

Bug: v8:7793
Change-Id: Idb6802372b407549e4760f290933d5b8f1e9d952
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1681132Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Tobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62979}
parent edacd800
......@@ -77,7 +77,7 @@ Node* RawMachineAssembler::OptimizedAllocate(
size);
}
Schedule* RawMachineAssembler::Export() {
Schedule* RawMachineAssembler::ExportForTest() {
// Compute the correct codegen order.
DCHECK(schedule_->rpo_order()->empty());
if (FLAG_trace_turbo_scheduler) {
......@@ -106,6 +106,7 @@ Graph* RawMachineAssembler::ExportForOptimization() {
StdoutStream{} << *schedule_;
}
schedule_->EnsureCFGWellFormedness();
OptimizeControlFlow(schedule_, graph(), common());
Scheduler::ComputeSpecialRPO(zone(), schedule_);
if (FLAG_trace_turbo_scheduler) {
PrintF("--- SCHEDULE BEFORE GRAPH CREATION -------------------------\n");
......@@ -117,6 +118,99 @@ Graph* RawMachineAssembler::ExportForOptimization() {
return graph();
}
void RawMachineAssembler::OptimizeControlFlow(Schedule* schedule, Graph* graph,
CommonOperatorBuilder* common) {
for (bool changed = true; changed;) {
changed = false;
for (size_t i = 0; i < schedule->all_blocks()->size(); ++i) {
BasicBlock* block = (*schedule->all_blocks())[i];
if (block == nullptr) continue;
// Short-circuit a goto if the succeeding block is not a control-flow
// merge. This is not really useful on it's own since graph construction
// has the same effect, but combining blocks improves the pattern-match on
// their structure below.
if (block->control() == BasicBlock::kGoto) {
DCHECK_EQ(block->SuccessorCount(), 1);
BasicBlock* successor = block->SuccessorAt(0);
if (successor->PredecessorCount() == 1) {
DCHECK_EQ(successor->PredecessorAt(0), block);
for (Node* node : *successor) {
schedule->SetBlockForNode(nullptr, node);
schedule->AddNode(block, node);
}
block->set_control(successor->control());
Node* control_input = successor->control_input();
block->set_control_input(control_input);
if (control_input) {
schedule->SetBlockForNode(block, control_input);
}
if (successor->deferred()) block->set_deferred(true);
block->ClearSuccessors();
schedule->MoveSuccessors(successor, block);
schedule->ClearBlockById(successor->id());
changed = true;
--i;
continue;
}
}
// Block-cloning in the simple case where a block consists only of a phi
// node and a branch on that phi. This just duplicates the branch block
// for each predecessor, replacing the phi node with the corresponding phi
// input.
if (block->control() == BasicBlock::kBranch && block->NodeCount() == 1) {
Node* phi = block->NodeAt(0);
if (phi->opcode() != IrOpcode::kPhi) continue;
Node* branch = block->control_input();
DCHECK_EQ(branch->opcode(), IrOpcode::kBranch);
if (NodeProperties::GetValueInput(branch, 0) != phi) continue;
if (phi->UseCount() != 1) continue;
DCHECK_EQ(phi->op()->ValueInputCount(), block->PredecessorCount());
// Turn projection blocks into normal blocks.
DCHECK_EQ(block->SuccessorCount(), 2);
BasicBlock* true_block = block->SuccessorAt(0);
BasicBlock* false_block = block->SuccessorAt(1);
DCHECK_EQ(true_block->NodeAt(0)->opcode(), IrOpcode::kIfTrue);
DCHECK_EQ(false_block->NodeAt(0)->opcode(), IrOpcode::kIfFalse);
(*true_block->begin())->Kill();
true_block->RemoveNode(true_block->begin());
(*false_block->begin())->Kill();
false_block->RemoveNode(false_block->begin());
true_block->ClearPredecessors();
false_block->ClearPredecessors();
size_t arity = block->PredecessorCount();
for (size_t i = 0; i < arity; ++i) {
BasicBlock* predecessor = block->PredecessorAt(i);
predecessor->ClearSuccessors();
if (block->deferred()) predecessor->set_deferred(true);
Node* branch_clone = graph->CloneNode(branch);
int phi_input = static_cast<int>(i);
NodeProperties::ReplaceValueInput(
branch_clone, NodeProperties::GetValueInput(phi, phi_input), 0);
BasicBlock* new_true_block = schedule->NewBasicBlock();
BasicBlock* new_false_block = schedule->NewBasicBlock();
new_true_block->AddNode(
graph->NewNode(common->IfTrue(), branch_clone));
new_false_block->AddNode(
graph->NewNode(common->IfFalse(), branch_clone));
schedule->AddGoto(new_true_block, true_block);
schedule->AddGoto(new_false_block, false_block);
DCHECK_EQ(predecessor->control(), BasicBlock::kGoto);
predecessor->set_control(BasicBlock::kNone);
schedule->AddBranch(predecessor, branch_clone, new_true_block,
new_false_block);
}
branch->Kill();
schedule->ClearBlockById(block->id());
changed = true;
continue;
}
}
}
}
void RawMachineAssembler::MakeReschedulable() {
std::vector<Node*> block_final_control(schedule_->all_blocks_.size());
std::vector<Node*> block_final_effect(schedule_->all_blocks_.size());
......
......@@ -65,9 +65,10 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
CallDescriptor* call_descriptor() const { return call_descriptor_; }
PoisoningMitigationLevel poisoning_level() const { return poisoning_level_; }
// Finalizes the schedule and exports it to be used for code generation. Note
// that this RawMachineAssembler becomes invalid after export.
Schedule* Export();
// Only used for tests: Finalizes the schedule and exports it to be used for
// code generation. Note that this RawMachineAssembler becomes invalid after
// export.
Schedule* ExportForTest();
// Finalizes the schedule and transforms it into a graph that's suitable for
// it to be used for Turbofan optimization and re-scheduling. Note that this
// RawMachineAssembler becomes invalid after export.
......@@ -1091,6 +1092,9 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
Schedule* schedule() { return schedule_; }
size_t parameter_count() const { return call_descriptor_->ParameterCount(); }
static void OptimizeControlFlow(Schedule* schedule, Graph* graph,
CommonOperatorBuilder* common);
Isolate* isolate_;
Graph* graph_;
Schedule* schedule_;
......
......@@ -163,6 +163,11 @@ BasicBlock* Schedule::GetBlockById(BasicBlock::Id block_id) {
return all_blocks_[block_id.ToSize()];
}
void Schedule::ClearBlockById(BasicBlock::Id block_id) {
DCHECK(block_id.ToSize() < all_blocks_.size());
all_blocks_[block_id.ToSize()] = nullptr;
}
bool Schedule::SameBasicBlock(Node* a, Node* b) const {
BasicBlock* block = this->block(a);
return block != nullptr && block == this->block(b);
......@@ -321,9 +326,6 @@ void Schedule::EnsureCFGWellFormedness() {
if (block != end_) {
EnsureSplitEdgeForm(block);
}
if (block->deferred()) {
EnsureDeferredCodeSingleEntryPoint(block);
}
}
}
......@@ -356,6 +358,7 @@ void Schedule::EliminateRedundantPhiNodes() {
}
if (!inputs_equal) continue;
node->ReplaceUses(first_input);
node->Kill();
block->RemoveNode(block->begin() + node_pos);
--node_pos;
reached_fixed_point = false;
......@@ -376,43 +379,6 @@ void Schedule::EnsureSplitEdgeForm(BasicBlock* block) {
#endif
}
void Schedule::EnsureDeferredCodeSingleEntryPoint(BasicBlock* block) {
// If a deferred block has multiple predecessors, they have to
// all be deferred. Otherwise, we can run into a situation where a range
// that spills only in deferred blocks inserts its spill in the block, but
// other ranges need moves inserted by ResolveControlFlow in the predecessors,
// which may clobber the register of this range.
// To ensure that, when a deferred block has multiple predecessors, and some
// are not deferred, we add a non-deferred block to collect all such edges.
DCHECK(block->deferred() && block->PredecessorCount() > 1);
bool all_deferred = true;
for (auto current_pred = block->predecessors().begin();
current_pred != block->predecessors().end(); ++current_pred) {
BasicBlock* pred = *current_pred;
if (!pred->deferred()) {
all_deferred = false;
break;
}
}
if (all_deferred) return;
BasicBlock* merger = NewBasicBlock();
merger->set_control(BasicBlock::kGoto);
merger->successors().push_back(block);
for (auto current_pred = block->predecessors().begin();
current_pred != block->predecessors().end(); ++current_pred) {
BasicBlock* pred = *current_pred;
merger->predecessors().push_back(pred);
pred->successors().clear();
pred->successors().push_back(merger);
}
merger->set_deferred(false);
block->predecessors().clear();
block->predecessors().push_back(merger);
MovePhis(block, merger);
}
void Schedule::MovePhis(BasicBlock* from, BasicBlock* to) {
for (size_t i = 0; i < from->NodeCount();) {
Node* node = from->NodeAt(i);
......@@ -481,6 +447,7 @@ void Schedule::SetBlockForNode(BasicBlock* block, Node* node) {
std::ostream& operator<<(std::ostream& os, const Schedule& s) {
for (BasicBlock* block :
((s.RpoBlockCount() == 0) ? *s.all_blocks() : *s.rpo_order())) {
if (block == nullptr) continue;
if (block->rpo_number() == -1) {
os << "--- BLOCK id:" << block->id().ToInt();
} else {
......
......@@ -200,6 +200,7 @@ class V8_EXPORT_PRIVATE Schedule final : public NON_EXPORTED_BASE(ZoneObject) {
bool IsScheduled(Node* node);
BasicBlock* GetBlockById(BasicBlock::Id block_id);
void ClearBlockById(BasicBlock::Id block_id);
size_t BasicBlockCount() const { return all_blocks_.size(); }
size_t RpoBlockCount() const { return rpo_order_.size(); }
......@@ -280,8 +281,6 @@ class V8_EXPORT_PRIVATE Schedule final : public NON_EXPORTED_BASE(ZoneObject) {
void EliminateRedundantPhiNodes();
// Ensure split-edge form for a hand-assembled schedule.
void EnsureSplitEdgeForm(BasicBlock* block);
// Ensure entry into a deferred block happens from a single hot block.
void EnsureDeferredCodeSingleEntryPoint(BasicBlock* block);
// Move Phi operands to newly created merger blocks
void MovePhis(BasicBlock* from, BasicBlock* to);
// Copy deferred block markers down as far as possible
......
......@@ -79,7 +79,7 @@ class RawMachineAssemblerTester : public HandleAndZoneScope,
protected:
Address Generate() override {
if (code_.is_null()) {
Schedule* schedule = this->Export();
Schedule* schedule = this->ExportForTest();
auto call_descriptor = this->call_descriptor();
Graph* graph = this->graph();
OptimizedCompilationInfo info(ArrayVector("testing"), main_zone(), kind_);
......
......@@ -167,11 +167,11 @@ void TestReturnMultipleValues(MachineType type) {
OptimizedCompilationInfo info(ArrayVector("testing"), handles.main_zone(),
Code::WASM_FUNCTION);
Handle<Code> code =
Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(),
AssemblerOptions::Default(handles.main_isolate()), m.Export())
.ToHandleChecked();
Handle<Code> code = Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(),
AssemblerOptions::Default(handles.main_isolate()),
m.ExportForTest())
.ToHandleChecked();
#ifdef ENABLE_DISASSEMBLER
if (FLAG_print_code) {
StdoutStream os;
......@@ -272,11 +272,11 @@ void ReturnLastValue(MachineType type) {
OptimizedCompilationInfo info(ArrayVector("testing"), handles.main_zone(),
Code::WASM_FUNCTION);
Handle<Code> code =
Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(),
AssemblerOptions::Default(handles.main_isolate()), m.Export())
.ToHandleChecked();
Handle<Code> code = Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(),
AssemblerOptions::Default(handles.main_isolate()),
m.ExportForTest())
.ToHandleChecked();
std::shared_ptr<wasm::NativeModule> module = AllocateNativeModule(
handles.main_isolate(), code->raw_instruction_size());
......@@ -334,11 +334,11 @@ void ReturnSumOfReturns(MachineType type) {
OptimizedCompilationInfo info(ArrayVector("testing"), handles.main_zone(),
Code::WASM_FUNCTION);
Handle<Code> code =
Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(),
AssemblerOptions::Default(handles.main_isolate()), m.Export())
.ToHandleChecked();
Handle<Code> code = Pipeline::GenerateCodeForTesting(
&info, handles.main_isolate(), desc, m.graph(),
AssemblerOptions::Default(handles.main_isolate()),
m.ExportForTest())
.ToHandleChecked();
std::shared_ptr<wasm::NativeModule> module = AllocateNativeModule(
handles.main_isolate(), code->raw_instruction_size());
......
......@@ -439,7 +439,7 @@ class Computer {
Graph graph(&zone);
RawMachineAssembler raw(isolate, &graph, desc);
build(desc, raw);
inner = CompileGraph("Compute", desc, &graph, raw.Export());
inner = CompileGraph("Compute", desc, &graph, raw.ExportForTest());
}
CSignatureOf<int32_t> csig;
......@@ -466,8 +466,8 @@ class Computer {
Node* store = io.StoreOutput(raw, call);
USE(store);
raw.Return(raw.Int32Constant(seed));
wrapper =
CompileGraph("Compute-wrapper-const", cdesc, &graph, raw.Export());
wrapper = CompileGraph("Compute-wrapper-const", cdesc, &graph,
raw.ExportForTest());
}
CodeRunner<int32_t> runnable(isolate, wrapper, &csig);
......@@ -501,7 +501,8 @@ class Computer {
Node* store = io.StoreOutput(raw, call);
USE(store);
raw.Return(raw.Int32Constant(seed));
wrapper = CompileGraph("Compute-wrapper", cdesc, &graph, raw.Export());
wrapper =
CompileGraph("Compute-wrapper", cdesc, &graph, raw.ExportForTest());
}
CodeRunner<int32_t> runnable(isolate, wrapper, &csig);
......@@ -576,7 +577,7 @@ static void CopyTwentyInt32(CallDescriptor* desc) {
kNoWriteBarrier);
}
raw.Return(raw.Int32Constant(42));
inner = CompileGraph("CopyTwentyInt32", desc, &graph, raw.Export());
inner = CompileGraph("CopyTwentyInt32", desc, &graph, raw.ExportForTest());
}
CSignatureOf<int32_t> csig;
......@@ -599,8 +600,8 @@ static void CopyTwentyInt32(CallDescriptor* desc) {
Node* call = raw.CallN(desc, input_count, inputs);
raw.Return(call);
wrapper =
CompileGraph("CopyTwentyInt32-wrapper", cdesc, &graph, raw.Export());
wrapper = CompileGraph("CopyTwentyInt32-wrapper", cdesc, &graph,
raw.ExportForTest());
}
CodeRunner<int32_t> runnable(isolate, wrapper, &csig);
......@@ -962,7 +963,8 @@ static void Build_Select_With_Call(
Graph graph(&zone);
RawMachineAssembler raw(isolate, &graph, desc);
raw.Return(raw.Parameter(which));
inner = CompileGraph("Select-indirection", desc, &graph, raw.Export());
inner =
CompileGraph("Select-indirection", desc, &graph, raw.ExportForTest());
CHECK(!inner.is_null());
CHECK(inner->IsCode());
}
......@@ -1058,7 +1060,7 @@ void MixedParamTest(int start) {
Graph graph(&zone);
RawMachineAssembler raw(isolate, &graph, desc);
raw.Return(raw.Parameter(which));
select = CompileGraph("Compute", desc, &graph, raw.Export());
select = CompileGraph("Compute", desc, &graph, raw.ExportForTest());
}
{
......@@ -1117,7 +1119,7 @@ void MixedParamTest(int start) {
expected_ret = static_cast<int32_t>(constant);
raw.Return(raw.Int32Constant(expected_ret));
wrapper = CompileGraph("Select-mixed-wrapper-const", cdesc, &graph,
raw.Export());
raw.ExportForTest());
}
CodeRunner<int32_t> runnable(isolate, wrapper, &csig);
......@@ -1176,7 +1178,7 @@ void TestStackSlot(MachineType slot_type, T expected) {
g.Store(slot_type.representation(), g.Parameter(11), g.Parameter(10),
WriteBarrierKind::kNoWriteBarrier);
g.Return(g.Parameter(9));
inner = CompileGraph("Compute", desc, &graph, g.Export());
inner = CompileGraph("Compute", desc, &graph, g.ExportForTest());
// Create function f with a stack slot which calls the inner function g.
BufferedRawMachineAssemblerTester<T> f(slot_type);
......
......@@ -585,6 +585,23 @@ TEST(TestGenericStruct2) {
ft.Call();
}
TEST(TestBranchOnBoolOptimization) {
CcTest::InitializeVM();
Isolate* isolate(CcTest::i_isolate());
i::HandleScope scope(isolate);
Handle<Context> context =
Utils::OpenHandle(*v8::Isolate::GetCurrent()->GetCurrentContext());
CodeAssemblerTester asm_tester(isolate, 1);
TestTorqueAssembler m(asm_tester.state());
{
m.TestBranchOnBoolOptimization(
m.UncheckedCast<Context>(m.HeapConstant(context)),
m.UncheckedCast<Smi>(m.Parameter(0)));
m.Return(m.UndefinedConstant());
}
asm_tester.GenerateCode();
}
} // namespace compiler
} // namespace internal
} // namespace v8
......@@ -238,10 +238,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
callee.Return(static_cast<int>(desc->ReturnCount()), returns.get());
OptimizedCompilationInfo info(ArrayVector("testing"), &zone, Code::STUB);
Handle<Code> code = Pipeline::GenerateCodeForTesting(
&info, i_isolate, desc, callee.graph(),
AssemblerOptions::Default(i_isolate), callee.Export())
.ToHandleChecked();
Handle<Code> code =
Pipeline::GenerateCodeForTesting(&info, i_isolate, desc, callee.graph(),
AssemblerOptions::Default(i_isolate),
callee.ExportForTest())
.ToHandleChecked();
std::shared_ptr<wasm::NativeModule> module =
AllocateNativeModule(i_isolate, code->raw_instruction_size());
......@@ -286,7 +287,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
Handle<Code> wrapper_code =
Pipeline::GenerateCodeForTesting(
&wrapper_info, i_isolate, wrapper_desc, caller.graph(),
AssemblerOptions::Default(i_isolate), caller.Export())
AssemblerOptions::Default(i_isolate), caller.ExportForTest())
.ToHandleChecked();
auto fn = GeneratedCode<int32_t>::FromCode(*wrapper_code);
......
......@@ -1008,4 +1008,26 @@ namespace test {
return smiAndIntptr;
}
macro BranchAndWriteResult(x: Smi, box: SmiBox): bool {
if (x > 5 || x < 0) {
box.value = 1;
return true;
} else {
box.value = 2;
return false;
}
}
@export
macro TestBranchOnBoolOptimization(implicit context: Context)(input: Smi) {
const box = NewSmiBox(1);
// If the two branches get combined into one, we should be able to determine
// the value of {box} statically.
if (BranchAndWriteResult(input, box)) {
StaticAssert(box.value == 1);
} else {
StaticAssert(box.value == 2);
}
}
}
......@@ -25,7 +25,7 @@ InstructionSelectorTest::Stream InstructionSelectorTest::StreamBuilder::Build(
InstructionSelector::Features features,
InstructionSelectorTest::StreamBuilderMode mode,
InstructionSelector::SourcePositionMode source_position_mode) {
Schedule* schedule = Export();
Schedule* schedule = ExportForTest();
if (FLAG_trace_turbo) {
StdoutStream{} << "=== Schedule before instruction selection ==="
<< std::endl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment