Commit a41c0c67 authored by Manos Koukoutos's avatar Manos Koukoutos Committed by V8 LUCI CQ

[turbofan] Separate BranchElimination and CsaLoadElimination

BranchElimination and CsaLoadElimination interracted badly and created
quadratic behavior when run together. This happened when
CsaLoadElimination kept updating arguments of a Merge, and
BranchElimination kept going through all of them to find the common
prefix of all path conditions. Therefore, we separate BranchElimination
and CsaLoadElimination in the csa and wasm optimization pipelines.

Additional changes:
- Split WasmOptimizationPhase from CsaOptimizationPhase.
- Remove now-redundant argument from CsaOptimizationPhase::Run.
- Fine-grain how statistics are measured in the wasm pipeline.

Change-Id: Id166f4f7d1ea69a1a7b7ca108af4ffedbcda8abb
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2912779
Commit-Queue: Manos Koukoutos <manoskouk@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74779}
parent b0eb59fb
...@@ -956,6 +956,13 @@ void PrintCode(Isolate* isolate, Handle<Code> code, ...@@ -956,6 +956,13 @@ void PrintCode(Isolate* isolate, Handle<Code> code,
void TraceScheduleAndVerify(OptimizedCompilationInfo* info, PipelineData* data, void TraceScheduleAndVerify(OptimizedCompilationInfo* info, PipelineData* data,
Schedule* schedule, const char* phase_name) { Schedule* schedule, const char* phase_name) {
#ifdef V8_RUNTIME_CALL_STATS
PipelineRunScope scope(data, "V8.TraceScheduleAndVerify",
RuntimeCallCounterId::kOptimizeTraceScheduleAndVerify,
RuntimeCallStats::kThreadSpecific);
#else
PipelineRunScope scope(data, "V8.TraceScheduleAndVerify");
#endif
if (info->trace_turbo_json()) { if (info->trace_turbo_json()) {
UnparkedScopeIfNeeded scope(data->broker()); UnparkedScopeIfNeeded scope(data->broker());
AllowHandleDereference allow_deref; AllowHandleDereference allow_deref;
...@@ -2041,33 +2048,116 @@ struct ScheduledEffectControlLinearizationPhase { ...@@ -2041,33 +2048,116 @@ struct ScheduledEffectControlLinearizationPhase {
} }
}; };
struct WasmOptimizationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmOptimization)
void Run(PipelineData* data, Zone* temp_zone, bool allow_signalling_nan) {
// Run optimizations in two rounds: First one around load elimination and
// then one around branch elimination. This is because those two
// optimizations sometimes display quadratic complexity when run together.
// We only need load elimination for managed objects.
if (FLAG_experimental_wasm_gc) {
GraphReducer graph_reducer(temp_zone, data->graph(),
&data->info()->tick_counter(), data->broker(),
data->jsgraph()->Dead(),
data->observe_node_manager());
MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph(),
allow_signalling_nan);
DeadCodeElimination dead_code_elimination(
&graph_reducer, data->graph(), data->common(), temp_zone,
data->info()->concurrent_inlining());
CommonOperatorReducer common_reducer(&graph_reducer, data->graph(),
data->broker(), data->common(),
data->machine(), temp_zone);
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
CsaLoadElimination load_elimination(&graph_reducer, data->jsgraph(),
temp_zone);
AddReducer(data, &graph_reducer, &machine_reducer);
AddReducer(data, &graph_reducer, &dead_code_elimination);
AddReducer(data, &graph_reducer, &common_reducer);
AddReducer(data, &graph_reducer, &value_numbering);
AddReducer(data, &graph_reducer, &load_elimination);
graph_reducer.ReduceGraph();
}
{
GraphReducer graph_reducer(temp_zone, data->graph(),
&data->info()->tick_counter(), data->broker(),
data->jsgraph()->Dead(),
data->observe_node_manager());
MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph(),
allow_signalling_nan);
DeadCodeElimination dead_code_elimination(
&graph_reducer, data->graph(), data->common(), temp_zone,
data->info()->concurrent_inlining());
CommonOperatorReducer common_reducer(&graph_reducer, data->graph(),
data->broker(), data->common(),
data->machine(), temp_zone);
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
BranchElimination branch_condition_elimination(
&graph_reducer, data->jsgraph(), temp_zone);
AddReducer(data, &graph_reducer, &machine_reducer);
AddReducer(data, &graph_reducer, &dead_code_elimination);
AddReducer(data, &graph_reducer, &common_reducer);
AddReducer(data, &graph_reducer, &value_numbering);
AddReducer(data, &graph_reducer, &branch_condition_elimination);
graph_reducer.ReduceGraph();
}
}
};
struct CsaEarlyOptimizationPhase { struct CsaEarlyOptimizationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(CSAEarlyOptimization) DECL_PIPELINE_PHASE_CONSTANTS(CSAEarlyOptimization)
void Run(PipelineData* data, Zone* temp_zone, bool allow_signalling_nan) { void Run(PipelineData* data, Zone* temp_zone) {
GraphReducer graph_reducer( // Run optimizations in two rounds: First one around load elimination and
temp_zone, data->graph(), &data->info()->tick_counter(), data->broker(), // then one around branch elimination. This is because those two
data->jsgraph()->Dead(), data->observe_node_manager()); // optimizations sometimes display quadratic complexity when run together.
MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph(), {
allow_signalling_nan); GraphReducer graph_reducer(temp_zone, data->graph(),
BranchElimination branch_condition_elimination(&graph_reducer, &data->info()->tick_counter(), data->broker(),
data->jsgraph(), temp_zone); data->jsgraph()->Dead(),
DeadCodeElimination dead_code_elimination( data->observe_node_manager());
&graph_reducer, data->graph(), data->common(), temp_zone, MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph(),
data->info()->concurrent_inlining()); true);
CommonOperatorReducer common_reducer(&graph_reducer, data->graph(), DeadCodeElimination dead_code_elimination(
data->broker(), data->common(), &graph_reducer, data->graph(), data->common(), temp_zone,
data->machine(), temp_zone); data->info()->concurrent_inlining());
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone()); CommonOperatorReducer common_reducer(&graph_reducer, data->graph(),
CsaLoadElimination load_elimination(&graph_reducer, data->jsgraph(), data->broker(), data->common(),
temp_zone); data->machine(), temp_zone);
AddReducer(data, &graph_reducer, &machine_reducer); ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
AddReducer(data, &graph_reducer, &branch_condition_elimination); CsaLoadElimination load_elimination(&graph_reducer, data->jsgraph(),
AddReducer(data, &graph_reducer, &dead_code_elimination); temp_zone);
AddReducer(data, &graph_reducer, &common_reducer); AddReducer(data, &graph_reducer, &machine_reducer);
AddReducer(data, &graph_reducer, &value_numbering); AddReducer(data, &graph_reducer, &dead_code_elimination);
AddReducer(data, &graph_reducer, &load_elimination); AddReducer(data, &graph_reducer, &common_reducer);
graph_reducer.ReduceGraph(); AddReducer(data, &graph_reducer, &value_numbering);
AddReducer(data, &graph_reducer, &load_elimination);
graph_reducer.ReduceGraph();
}
{
GraphReducer graph_reducer(temp_zone, data->graph(),
&data->info()->tick_counter(), data->broker(),
data->jsgraph()->Dead(),
data->observe_node_manager());
MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph(),
true);
DeadCodeElimination dead_code_elimination(
&graph_reducer, data->graph(), data->common(), temp_zone,
data->info()->concurrent_inlining());
CommonOperatorReducer common_reducer(&graph_reducer, data->graph(),
data->broker(), data->common(),
data->machine(), temp_zone);
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
BranchElimination branch_condition_elimination(
&graph_reducer, data->jsgraph(), temp_zone);
AddReducer(data, &graph_reducer, &machine_reducer);
AddReducer(data, &graph_reducer, &dead_code_elimination);
AddReducer(data, &graph_reducer, &common_reducer);
AddReducer(data, &graph_reducer, &value_numbering);
AddReducer(data, &graph_reducer, &branch_condition_elimination);
graph_reducer.ReduceGraph();
}
} }
}; };
...@@ -2966,7 +3056,7 @@ MaybeHandle<Code> Pipeline::GenerateCodeForCodeStub( ...@@ -2966,7 +3056,7 @@ MaybeHandle<Code> Pipeline::GenerateCodeForCodeStub(
pipeline.Run<PrintGraphPhase>("V8.TFMachineCode"); pipeline.Run<PrintGraphPhase>("V8.TFMachineCode");
} }
pipeline.Run<CsaEarlyOptimizationPhase>(true); pipeline.Run<CsaEarlyOptimizationPhase>();
pipeline.RunPrintAndVerify(CsaEarlyOptimizationPhase::phase_name(), true); pipeline.RunPrintAndVerify(CsaEarlyOptimizationPhase::phase_name(), true);
// Optimize memory access and allocation operations. // Optimize memory access and allocation operations.
...@@ -3183,8 +3273,8 @@ void Pipeline::GenerateCodeForWasmFunction( ...@@ -3183,8 +3273,8 @@ void Pipeline::GenerateCodeForWasmFunction(
const bool is_asm_js = is_asmjs_module(module); const bool is_asm_js = is_asmjs_module(module);
if (FLAG_wasm_opt || is_asm_js) { if (FLAG_wasm_opt || is_asm_js) {
pipeline.Run<CsaEarlyOptimizationPhase>(is_asm_js); pipeline.Run<WasmOptimizationPhase>(is_asm_js);
pipeline.RunPrintAndVerify(CsaEarlyOptimizationPhase::phase_name(), true); pipeline.RunPrintAndVerify(WasmOptimizationPhase::phase_name(), true);
} else { } else {
pipeline.Run<WasmBaseOptimizationPhase>(); pipeline.Run<WasmBaseOptimizationPhase>();
pipeline.RunPrintAndVerify(WasmBaseOptimizationPhase::phase_name(), true); pipeline.RunPrintAndVerify(WasmBaseOptimizationPhase::phase_name(), true);
...@@ -3201,6 +3291,7 @@ void Pipeline::GenerateCodeForWasmFunction( ...@@ -3201,6 +3291,7 @@ void Pipeline::GenerateCodeForWasmFunction(
data.node_origins()->RemoveDecorator(); data.node_origins()->RemoveDecorator();
} }
data.BeginPhaseKind("V8.InstructionSelection");
pipeline.ComputeScheduledGraph(); pipeline.ComputeScheduledGraph();
Linkage linkage(call_descriptor); Linkage linkage(call_descriptor);
......
...@@ -366,6 +366,7 @@ class RuntimeCallTimer final { ...@@ -366,6 +366,7 @@ class RuntimeCallTimer final {
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SelectInstructions) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SelectInstructions) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SimplifiedLowering) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SimplifiedLowering) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, StoreStoreElimination) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, StoreStoreElimination) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TraceScheduleAndVerify) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TypeAssertions) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TypeAssertions) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TypedLowering) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TypedLowering) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, Typer) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, Typer) \
...@@ -374,6 +375,7 @@ class RuntimeCallTimer final { ...@@ -374,6 +375,7 @@ class RuntimeCallTimer final {
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmBaseOptimization) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmBaseOptimization) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmInlining) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmInlining) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmLoopUnrolling) \ ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmLoopUnrolling) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmOptimization) \
\ \
ADD_THREAD_SPECIFIC_COUNTER(V, Parse, ArrowFunctionLiteral) \ ADD_THREAD_SPECIFIC_COUNTER(V, Parse, ArrowFunctionLiteral) \
ADD_THREAD_SPECIFIC_COUNTER(V, Parse, FunctionLiteral) \ ADD_THREAD_SPECIFIC_COUNTER(V, Parse, FunctionLiteral) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment