Commit b9855173 authored by Manos Koukoutos's avatar Manos Koukoutos Committed by V8 LUCI CQ

[wasm][turbofan] Inline before loop unrolling

We switch the order of inlining and loop unrolling optimizations. This
gives small improvements to wasm-gc benchmarks.
Changes:
- Change the loop analysis algorithm to accept loops directly connected
  to the graph's end. This is required because some nodes in an inlined
  function, such as tail calls, might be directly connected to the outer
  function's end without an intervening LoopExit node.
- Based on the above, skip emitting loop exits for some Throw nodes in
  WasmGraphBuildingInterface.
- Introduce WasmInliningPhase, add it before loop unrolling. Remove
  inlining from WasmOptimizationPhase.
- Handle graph terminators in loop unrolling.
- Add loops in the inlined function to the callers loop_infos.
Drive-by:
- Allow more wasm builtins in unrolled loops.
- Reduce inlining parameters to reflect that functions are now slightly
  smaller during inlining, as no unrolling has taken place yet.

Bug: v8:12166
Change-Id: Iadd6b2f75170aa153ca1efb47fbb0d185c2b8371
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3329783Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarNico Hartmann <nicohartmann@chromium.org>
Commit-Queue: Manos Koukoutos <manoskouk@chromium.org>
Cr-Commit-Position: refs/heads/main@{#78394}
parent a7db5fcb
...@@ -550,40 +550,48 @@ LoopTree* LoopFinder::BuildLoopTree(Graph* graph, TickCounter* tick_counter, ...@@ -550,40 +550,48 @@ LoopTree* LoopFinder::BuildLoopTree(Graph* graph, TickCounter* tick_counter,
#if V8_ENABLE_WEBASSEMBLY #if V8_ENABLE_WEBASSEMBLY
// static // static
ZoneUnorderedSet<Node*>* LoopFinder::FindSmallUnnestedLoopFromHeader( ZoneUnorderedSet<Node*>* LoopFinder::FindSmallInnermostLoopFromHeader(
Node* loop_header, Zone* zone, size_t max_size) { Node* loop_header, Zone* zone, size_t max_size) {
auto* visited = zone->New<ZoneUnorderedSet<Node*>>(zone); auto* visited = zone->New<ZoneUnorderedSet<Node*>>(zone);
std::vector<Node*> queue; std::vector<Node*> queue;
DCHECK(loop_header->opcode() == IrOpcode::kLoop); DCHECK_EQ(loop_header->opcode(), IrOpcode::kLoop);
queue.push_back(loop_header); queue.push_back(loop_header);
#define ENQUEUE_USES(use_name, condition) \
for (Node * use_name : node->uses()) { \
if (condition && visited->count(use_name) == 0) queue.push_back(use_name); \
}
while (!queue.empty()) { while (!queue.empty()) {
Node* node = queue.back(); Node* node = queue.back();
queue.pop_back(); queue.pop_back();
// Terminate is not part of the loop, and neither are its uses. if (node->opcode() == IrOpcode::kEnd) {
if (node->opcode() == IrOpcode::kTerminate) { // We reached the end of the graph. The end node is not part of the loop.
DCHECK_EQ(node->InputAt(1), loop_header);
continue; continue;
} }
visited->insert(node); visited->insert(node);
if (visited->size() > max_size) return nullptr; if (visited->size() > max_size) return nullptr;
switch (node->opcode()) { switch (node->opcode()) {
case IrOpcode::kLoop:
// Found nested loop.
if (node != loop_header) return nullptr;
ENQUEUE_USES(use, true);
break;
case IrOpcode::kLoopExit: case IrOpcode::kLoopExit:
DCHECK_EQ(node->InputAt(1), loop_header); // Found nested loop.
if (node->InputAt(1) != loop_header) return nullptr;
// LoopExitValue/Effect uses are inside the loop. The rest are not. // LoopExitValue/Effect uses are inside the loop. The rest are not.
for (Node* use : node->uses()) { ENQUEUE_USES(use, (use->opcode() == IrOpcode::kLoopExitEffect ||
if (use->opcode() == IrOpcode::kLoopExitEffect || use->opcode() == IrOpcode::kLoopExitValue))
use->opcode() == IrOpcode::kLoopExitValue) {
if (visited->count(use) == 0) queue.push_back(use);
}
}
break; break;
case IrOpcode::kLoopExitEffect: case IrOpcode::kLoopExitEffect:
case IrOpcode::kLoopExitValue: case IrOpcode::kLoopExitValue:
DCHECK_EQ(NodeProperties::GetControlInput(node)->InputAt(1), if (NodeProperties::GetControlInput(node)->InputAt(1) != loop_header) {
loop_header); // Found nested loop.
return nullptr;
}
// All uses are outside the loop, do nothing. // All uses are outside the loop, do nothing.
break; break;
case IrOpcode::kTailCall: case IrOpcode::kTailCall:
...@@ -602,27 +610,31 @@ ZoneUnorderedSet<Node*>* LoopFinder::FindSmallUnnestedLoopFromHeader( ...@@ -602,27 +610,31 @@ ZoneUnorderedSet<Node*>* LoopFinder::FindSmallUnnestedLoopFromHeader(
OpParameter<RelocatablePtrConstantInfo>(callee->op()).value(); OpParameter<RelocatablePtrConstantInfo>(callee->op()).value();
using WasmCode = v8::internal::wasm::WasmCode; using WasmCode = v8::internal::wasm::WasmCode;
constexpr intptr_t unrollable_builtins[] = { constexpr intptr_t unrollable_builtins[] = {
// Exists in every stack check.
WasmCode::kWasmStackGuard, WasmCode::kWasmStackGuard,
WasmCode::kWasmTableGet, // Fast table operations.
WasmCode::kWasmTableSet, WasmCode::kWasmTableGet, WasmCode::kWasmTableSet,
WasmCode::kWasmTableGrow, WasmCode::kWasmTableGrow,
WasmCode::kWasmThrow, // Atomics.
WasmCode::kWasmRethrow, WasmCode::kWasmAtomicNotify, WasmCode::kWasmI32AtomicWait32,
WasmCode::kWasmRethrowExplicitContext, WasmCode::kWasmI32AtomicWait64, WasmCode::kWasmI64AtomicWait32,
WasmCode::kWasmRefFunc, WasmCode::kWasmI64AtomicWait64,
WasmCode::kWasmAllocateRtt, // Exceptions.
WasmCode::kWasmAllocateFixedArray, WasmCode::kWasmThrow,
WasmCode::kWasmRethrow, WasmCode::kWasmRethrowExplicitContext,
// Fast wasm-gc operations.
WasmCode::kWasmRefFunc, WasmCode::kWasmAllocateRtt,
WasmCode::kWasmAllocateFreshRtt}; WasmCode::kWasmAllocateFreshRtt};
if (std::count(unrollable_builtins, if (std::count(unrollable_builtins,
unrollable_builtins + arraysize(unrollable_builtins), unrollable_builtins + arraysize(unrollable_builtins),
info) == 0) { info) == 0) {
return nullptr; return nullptr;
} }
V8_FALLTHROUGH; ENQUEUE_USES(use, true)
break;
} }
default: default:
for (Node* use : node->uses()) { ENQUEUE_USES(use, true)
if (visited->count(use) == 0) queue.push_back(use);
}
break; break;
} }
} }
......
...@@ -180,16 +180,16 @@ class V8_EXPORT_PRIVATE LoopFinder { ...@@ -180,16 +180,16 @@ class V8_EXPORT_PRIVATE LoopFinder {
static bool HasMarkedExits(LoopTree* loop_tree_, const LoopTree::Loop* loop); static bool HasMarkedExits(LoopTree* loop_tree_, const LoopTree::Loop* loop);
#if V8_ENABLE_WEBASSEMBLY #if V8_ENABLE_WEBASSEMBLY
// Find all nodes of a loop given headed by {loop_header}. Returns {nullptr} // Find all nodes in the loop headed by {loop_header} if it contains no nested
// if the loop size in Nodes exceeds {max_size}. In that context, function // loops.
// calls are considered to have unbounded size, so if the loop contains a // Assumption: *if* this loop has no nested loops, all exits from the loop are
// function call, {nullptr} is always returned. // marked with LoopExit, LoopExitEffect, LoopExitValue, or End nodes.
// This is a very restricted version of BuildLoopTree and makes the following // Returns {nullptr} if
// assumptions: // 1) the loop size (in graph nodes) exceeds {max_size},
// 1) All loop exits of the loop are marked with LoopExit, LoopExitEffect, // 2) a function call is found in the loop, excluding calls to a set of wasm
// and LoopExitValue nodes. // builtins,
// 2) There are no nested loops within this loop. // 3) a nested loop is found in the loop.
static ZoneUnorderedSet<Node*>* FindSmallUnnestedLoopFromHeader( static ZoneUnorderedSet<Node*>* FindSmallInnermostLoopFromHeader(
Node* loop_header, Zone* zone, size_t max_size); Node* loop_header, Zone* zone, size_t max_size);
#endif #endif
}; };
......
...@@ -19,8 +19,7 @@ void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth, ...@@ -19,8 +19,7 @@ void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth,
SourcePositionTable* source_positions, SourcePositionTable* source_positions,
NodeOriginTable* node_origins) { NodeOriginTable* node_origins) {
DCHECK_EQ(loop_node->opcode(), IrOpcode::kLoop); DCHECK_EQ(loop_node->opcode(), IrOpcode::kLoop);
DCHECK_NOT_NULL(loop);
if (loop == nullptr) return;
// No back-jump to the loop header means this is not really a loop. // No back-jump to the loop header means this is not really a loop.
if (loop_node->InputCount() < 2) return; if (loop_node->InputCount() < 2) return;
...@@ -41,6 +40,15 @@ void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth, ...@@ -41,6 +40,15 @@ void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth,
source_positions, node_origins); source_positions, node_origins);
source_positions->RemoveDecorator(); source_positions->RemoveDecorator();
// The terminator nodes in the copies need to get connected to the graph's end
// node, except Terminate nodes which will be deleted anyway.
for (Node* node : copies) {
if (IrOpcode::IsGraphTerminator(node->opcode()) &&
node->opcode() != IrOpcode::kTerminate && node->UseCount() == 0) {
NodeProperties::MergeControlToEnd(graph, common, node);
}
}
#define COPY(node, n) copier.map(node, n) #define COPY(node, n) copier.map(node, n)
#define FOREACH_COPY_INDEX(i) for (uint32_t i = 0; i < unrolling_count; i++) #define FOREACH_COPY_INDEX(i) for (uint32_t i = 0; i < unrolling_count; i++)
...@@ -141,6 +149,13 @@ void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth, ...@@ -141,6 +149,13 @@ void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth,
break; break;
} }
case IrOpcode::kTerminate: {
// We only need to keep the Terminate node for the loop header of the
// first iteration.
FOREACH_COPY_INDEX(i) { COPY(node, i)->Kill(); }
break;
}
default: default:
break; break;
} }
......
...@@ -1657,19 +1657,43 @@ struct LoopPeelingPhase { ...@@ -1657,19 +1657,43 @@ struct LoopPeelingPhase {
}; };
#if V8_ENABLE_WEBASSEMBLY #if V8_ENABLE_WEBASSEMBLY
struct WasmInliningPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmInlining)
void Run(PipelineData* data, Zone* temp_zone, wasm::CompilationEnv* env,
uint32_t function_index, const wasm::WireBytesStorage* wire_bytes,
std::vector<compiler::WasmLoopInfo>* loop_info) {
if (WasmInliner::any_inlining_impossible(data->graph()->NodeCount())) {
return;
}
GraphReducer graph_reducer(
temp_zone, data->graph(), &data->info()->tick_counter(), data->broker(),
data->jsgraph()->Dead(), data->observe_node_manager());
DeadCodeElimination dead(&graph_reducer, data->graph(), data->common(),
temp_zone);
WasmInliner inliner(&graph_reducer, env, function_index,
data->source_positions(), data->node_origins(),
data->mcgraph(), wire_bytes, loop_info);
AddReducer(data, &graph_reducer, &dead);
AddReducer(data, &graph_reducer, &inliner);
graph_reducer.ReduceGraph();
}
};
struct WasmLoopUnrollingPhase { struct WasmLoopUnrollingPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmLoopUnrolling) DECL_PIPELINE_PHASE_CONSTANTS(WasmLoopUnrolling)
void Run(PipelineData* data, Zone* temp_zone, void Run(PipelineData* data, Zone* temp_zone,
std::vector<compiler::WasmLoopInfo>* loop_infos) { std::vector<compiler::WasmLoopInfo>* loop_infos) {
for (WasmLoopInfo& loop_info : *loop_infos) { for (WasmLoopInfo& loop_info : *loop_infos) {
if (loop_info.is_innermost) { if (loop_info.can_be_innermost) {
ZoneUnorderedSet<Node*>* loop = ZoneUnorderedSet<Node*>* loop =
LoopFinder::FindSmallUnnestedLoopFromHeader( LoopFinder::FindSmallInnermostLoopFromHeader(
loop_info.header, temp_zone, loop_info.header, temp_zone,
// Only discover the loop until its size is the maximum unrolled // Only discover the loop until its size is the maximum unrolled
// size for its depth. // size for its depth.
maximum_unrollable_size(loop_info.nesting_depth)); maximum_unrollable_size(loop_info.nesting_depth));
if (loop == nullptr) continue;
UnrollLoop(loop_info.header, loop, loop_info.nesting_depth, UnrollLoop(loop_info.header, loop, loop_info.nesting_depth,
data->graph(), data->common(), temp_zone, data->graph(), data->common(), temp_zone,
data->source_positions(), data->node_origins()); data->source_positions(), data->node_origins());
...@@ -1994,9 +2018,7 @@ struct ScheduledEffectControlLinearizationPhase { ...@@ -1994,9 +2018,7 @@ struct ScheduledEffectControlLinearizationPhase {
struct WasmOptimizationPhase { struct WasmOptimizationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmOptimization) DECL_PIPELINE_PHASE_CONSTANTS(WasmOptimization)
void Run(PipelineData* data, Zone* temp_zone, bool allow_signalling_nan, void Run(PipelineData* data, Zone* temp_zone, bool allow_signalling_nan) {
wasm::CompilationEnv* env, uint32_t function_index,
const wasm::WireBytesStorage* wire_bytes) {
// Run optimizations in two rounds: First one around load elimination and // Run optimizations in two rounds: First one around load elimination and
// then one around branch elimination. This is because those two // then one around branch elimination. This is because those two
// optimizations sometimes display quadratic complexity when run together. // optimizations sometimes display quadratic complexity when run together.
...@@ -2016,9 +2038,6 @@ struct WasmOptimizationPhase { ...@@ -2016,9 +2038,6 @@ struct WasmOptimizationPhase {
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone()); ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
CsaLoadElimination load_elimination(&graph_reducer, data->jsgraph(), CsaLoadElimination load_elimination(&graph_reducer, data->jsgraph(),
temp_zone); temp_zone);
WasmInliner inliner(&graph_reducer, env, function_index,
data->source_positions(), data->node_origins(),
data->mcgraph(), wire_bytes);
WasmEscapeAnalysis escape(&graph_reducer, data->mcgraph()); WasmEscapeAnalysis escape(&graph_reducer, data->mcgraph());
AddReducer(data, &graph_reducer, &machine_reducer); AddReducer(data, &graph_reducer, &machine_reducer);
AddReducer(data, &graph_reducer, &dead_code_elimination); AddReducer(data, &graph_reducer, &dead_code_elimination);
...@@ -2028,10 +2047,6 @@ struct WasmOptimizationPhase { ...@@ -2028,10 +2047,6 @@ struct WasmOptimizationPhase {
AddReducer(data, &graph_reducer, &load_elimination); AddReducer(data, &graph_reducer, &load_elimination);
AddReducer(data, &graph_reducer, &escape); AddReducer(data, &graph_reducer, &escape);
} }
if (FLAG_wasm_inlining &&
!WasmInliner::any_inlining_impossible(data->graph()->NodeCount())) {
AddReducer(data, &graph_reducer, &inliner);
}
graph_reducer.ReduceGraph(); graph_reducer.ReduceGraph();
} }
{ {
...@@ -3229,6 +3244,11 @@ void Pipeline::GenerateCodeForWasmFunction( ...@@ -3229,6 +3244,11 @@ void Pipeline::GenerateCodeForWasmFunction(
pipeline.RunPrintAndVerify("V8.WasmMachineCode", true); pipeline.RunPrintAndVerify("V8.WasmMachineCode", true);
data.BeginPhaseKind("V8.WasmOptimization"); data.BeginPhaseKind("V8.WasmOptimization");
if (FLAG_wasm_inlining) {
pipeline.Run<WasmInliningPhase>(env, function_index, wire_bytes_storage,
loop_info);
pipeline.RunPrintAndVerify(WasmInliningPhase::phase_name(), true);
}
if (FLAG_wasm_loop_unrolling) { if (FLAG_wasm_loop_unrolling) {
pipeline.Run<WasmLoopUnrollingPhase>(loop_info); pipeline.Run<WasmLoopUnrollingPhase>(loop_info);
pipeline.RunPrintAndVerify(WasmLoopUnrollingPhase::phase_name(), true); pipeline.RunPrintAndVerify(WasmLoopUnrollingPhase::phase_name(), true);
...@@ -3236,8 +3256,7 @@ void Pipeline::GenerateCodeForWasmFunction( ...@@ -3236,8 +3256,7 @@ void Pipeline::GenerateCodeForWasmFunction(
const bool is_asm_js = is_asmjs_module(module); const bool is_asm_js = is_asmjs_module(module);
if (FLAG_wasm_opt || is_asm_js) { if (FLAG_wasm_opt || is_asm_js) {
pipeline.Run<WasmOptimizationPhase>(is_asm_js, env, function_index, pipeline.Run<WasmOptimizationPhase>(is_asm_js);
wire_bytes_storage);
pipeline.RunPrintAndVerify(WasmOptimizationPhase::phase_name(), true); pipeline.RunPrintAndVerify(WasmOptimizationPhase::phase_name(), true);
} else { } else {
pipeline.Run<WasmBaseOptimizationPhase>(); pipeline.Run<WasmBaseOptimizationPhase>();
......
...@@ -183,12 +183,14 @@ struct WasmInstanceCacheNodes { ...@@ -183,12 +183,14 @@ struct WasmInstanceCacheNodes {
struct WasmLoopInfo { struct WasmLoopInfo {
Node* header; Node* header;
uint32_t nesting_depth; uint32_t nesting_depth;
bool is_innermost; // This loop has, to our best knowledge, no other loops nested within it. A
// loop can obtain inner loops despite this after inlining.
bool can_be_innermost;
WasmLoopInfo(Node* header, uint32_t nesting_depth, bool is_innermost) WasmLoopInfo(Node* header, uint32_t nesting_depth, bool can_be_innermost)
: header(header), : header(header),
nesting_depth(nesting_depth), nesting_depth(nesting_depth),
is_innermost(is_innermost) {} can_be_innermost(can_be_innermost) {}
}; };
// Abstracts details of building TurboFan graph nodes for wasm to separate // Abstracts details of building TurboFan graph nodes for wasm to separate
......
...@@ -151,20 +151,20 @@ void WasmInliner::Finalize() { ...@@ -151,20 +151,20 @@ void WasmInliner::Finalize() {
wasm::WasmFeatures detected; wasm::WasmFeatures detected;
WasmGraphBuilder builder(env_, zone(), mcgraph_, inlinee_body.sig, WasmGraphBuilder builder(env_, zone(), mcgraph_, inlinee_body.sig,
source_positions_); source_positions_);
std::vector<WasmLoopInfo> infos; std::vector<WasmLoopInfo> inlinee_loop_infos;
size_t subgraph_min_node_id = graph()->NodeCount(); size_t subgraph_min_node_id = graph()->NodeCount();
Node* inlinee_start; Node* inlinee_start;
Node* inlinee_end; Node* inlinee_end;
{ {
Graph::SubgraphScope scope(graph()); Graph::SubgraphScope scope(graph());
wasm::DecodeResult result = wasm::DecodeResult result = wasm::BuildTFGraph(
wasm::BuildTFGraph(zone()->allocator(), env_->enabled_features, zone()->allocator(), env_->enabled_features, module(), &builder,
module(), &builder, &detected, inlinee_body, &detected, inlinee_body, &inlinee_loop_infos, node_origins_,
&infos, node_origins_, candidate.inlinee_index, candidate.inlinee_index,
NodeProperties::IsExceptionalCall(call) NodeProperties::IsExceptionalCall(call)
? wasm::kInlinedHandledCall ? wasm::kInlinedHandledCall
: wasm::kInlinedNonHandledCall); : wasm::kInlinedNonHandledCall);
if (result.failed()) { if (result.failed()) {
// This can happen if the inlinee has never been compiled before and is // This can happen if the inlinee has never been compiled before and is
// invalid. Return, as there is no point to keep optimizing. // invalid. Return, as there is no point to keep optimizing.
...@@ -196,6 +196,8 @@ void WasmInliner::Finalize() { ...@@ -196,6 +196,8 @@ void WasmInliner::Finalize() {
InlineTailCall(call, inlinee_start, inlinee_end); InlineTailCall(call, inlinee_start, inlinee_end);
} }
call->Kill(); call->Kill();
loop_infos_->insert(loop_infos_->end(), inlinee_loop_infos.begin(),
inlinee_loop_infos.end());
// Returning after only one inlining has been tried and found worse. // Returning after only one inlining has been tried and found worse.
} }
} }
......
...@@ -29,17 +29,17 @@ namespace compiler { ...@@ -29,17 +29,17 @@ namespace compiler {
class NodeOriginTable; class NodeOriginTable;
class SourcePositionTable; class SourcePositionTable;
struct WasmLoopInfo;
// The WasmInliner provides the core graph inlining machinery for Webassembly // The WasmInliner provides the core graph inlining machinery for Webassembly
// graphs. Note that this class only deals with the mechanics of how to inline // graphs.
// one graph into another; heuristics that decide what and how much to inline
// are provided by {WasmInliningHeuristics}.
class WasmInliner final : public AdvancedReducer { class WasmInliner final : public AdvancedReducer {
public: public:
WasmInliner(Editor* editor, wasm::CompilationEnv* env, WasmInliner(Editor* editor, wasm::CompilationEnv* env,
uint32_t function_index, SourcePositionTable* source_positions, uint32_t function_index, SourcePositionTable* source_positions,
NodeOriginTable* node_origins, MachineGraph* mcgraph, NodeOriginTable* node_origins, MachineGraph* mcgraph,
const wasm::WireBytesStorage* wire_bytes) const wasm::WireBytesStorage* wire_bytes,
std::vector<WasmLoopInfo>* loop_infos)
: AdvancedReducer(editor), : AdvancedReducer(editor),
env_(env), env_(env),
function_index_(function_index), function_index_(function_index),
...@@ -47,6 +47,7 @@ class WasmInliner final : public AdvancedReducer { ...@@ -47,6 +47,7 @@ class WasmInliner final : public AdvancedReducer {
node_origins_(node_origins), node_origins_(node_origins),
mcgraph_(mcgraph), mcgraph_(mcgraph),
wire_bytes_(wire_bytes), wire_bytes_(wire_bytes),
loop_infos_(loop_infos),
initial_graph_size_(mcgraph->graph()->NodeCount()), initial_graph_size_(mcgraph->graph()->NodeCount()),
current_graph_size_(initial_graph_size_), current_graph_size_(initial_graph_size_),
inlining_candidates_() {} inlining_candidates_() {}
...@@ -143,6 +144,7 @@ class WasmInliner final : public AdvancedReducer { ...@@ -143,6 +144,7 @@ class WasmInliner final : public AdvancedReducer {
NodeOriginTable* const node_origins_; NodeOriginTable* const node_origins_;
MachineGraph* const mcgraph_; MachineGraph* const mcgraph_;
const wasm::WireBytesStorage* const wire_bytes_; const wasm::WireBytesStorage* const wire_bytes_;
std::vector<WasmLoopInfo>* const loop_infos_;
const size_t initial_graph_size_; const size_t initial_graph_size_;
size_t current_graph_size_; size_t current_graph_size_;
std::priority_queue<CandidateInfo, std::vector<CandidateInfo>, std::priority_queue<CandidateInfo, std::vector<CandidateInfo>,
......
...@@ -1084,9 +1084,9 @@ DEFINE_BOOL( ...@@ -1084,9 +1084,9 @@ DEFINE_BOOL(
wasm_inlining, false, wasm_inlining, false,
"enable inlining of wasm functions into wasm functions (experimental)") "enable inlining of wasm functions into wasm functions (experimental)")
DEFINE_SIZE_T( DEFINE_SIZE_T(
wasm_inlining_budget_factor, 100000, wasm_inlining_budget_factor, 75000,
"maximum allowed size to inline a function is given by {n / caller size}") "maximum allowed size to inline a function is given by {n / caller size}")
DEFINE_SIZE_T(wasm_inlining_max_size, 1250, DEFINE_SIZE_T(wasm_inlining_max_size, 1000,
"maximum size of a function that can be inlined, in TF nodes") "maximum size of a function that can be inlined, in TF nodes")
DEFINE_BOOL(wasm_speculative_inlining, false, DEFINE_BOOL(wasm_speculative_inlining, false,
"enable speculative inlining of call_ref targets (experimental)") "enable speculative inlining of call_ref targets (experimental)")
......
...@@ -212,14 +212,14 @@ class WasmGraphBuildingInterface { ...@@ -212,14 +212,14 @@ class WasmGraphBuildingInterface {
nesting_depth++; nesting_depth++;
} }
} }
// If this loop is nested, the parent loop's is_innermost field needs to // If this loop is nested, the parent loop's can_be_innermost field needs
// be false. If the last loop in loop_infos_ has less depth, it has to be // to be false. If the last loop in loop_infos_ has less depth, it has to
// the parent loop. If it does not, it means another loop has been found // be the parent loop. If it does not, it means another loop has been
// within the parent loop, and that loop will have set the parent's // found within the parent loop, and that loop will have set the parent's
// is_innermost to false, so we do not need to do anything. // can_be_innermost to false, so we do not need to do anything.
if (nesting_depth > 0 && if (nesting_depth > 0 &&
loop_infos_.back().nesting_depth < nesting_depth) { loop_infos_.back().nesting_depth < nesting_depth) {
loop_infos_.back().is_innermost = false; loop_infos_.back().can_be_innermost = false;
} }
loop_infos_.emplace_back(loop_node, nesting_depth, true); loop_infos_.emplace_back(loop_node, nesting_depth, true);
} }
...@@ -442,11 +442,6 @@ class WasmGraphBuildingInterface { ...@@ -442,11 +442,6 @@ class WasmGraphBuildingInterface {
} }
void Trap(FullDecoder* decoder, TrapReason reason) { void Trap(FullDecoder* decoder, TrapReason reason) {
ValueVector values;
if (emit_loop_exits()) {
BuildNestedLoopExits(decoder, decoder->control_depth() - 1, false,
values);
}
builder_->Trap(reason, decoder->position()); builder_->Trap(reason, decoder->position());
} }
...@@ -845,7 +840,7 @@ class WasmGraphBuildingInterface { ...@@ -845,7 +840,7 @@ class WasmGraphBuildingInterface {
CheckForException(decoder, CheckForException(decoder,
builder_->Throw(imm.index, imm.tag, base::VectorOf(args), builder_->Throw(imm.index, imm.tag, base::VectorOf(args),
decoder->position())); decoder->position()));
TerminateThrow(decoder); builder_->TerminateThrow(effect(), control());
} }
void Rethrow(FullDecoder* decoder, Control* block) { void Rethrow(FullDecoder* decoder, Control* block) {
...@@ -853,7 +848,7 @@ class WasmGraphBuildingInterface { ...@@ -853,7 +848,7 @@ class WasmGraphBuildingInterface {
TFNode* exception = block->try_info->exception; TFNode* exception = block->try_info->exception;
DCHECK_NOT_NULL(exception); DCHECK_NOT_NULL(exception);
CheckForException(decoder, builder_->Rethrow(exception)); CheckForException(decoder, builder_->Rethrow(exception));
TerminateThrow(decoder); builder_->TerminateThrow(effect(), control());
} }
void CatchException(FullDecoder* decoder, void CatchException(FullDecoder* decoder,
...@@ -910,7 +905,7 @@ class WasmGraphBuildingInterface { ...@@ -910,7 +905,7 @@ class WasmGraphBuildingInterface {
// We just throw to the caller here, so no need to generate IfSuccess // We just throw to the caller here, so no need to generate IfSuccess
// and IfFailure nodes. // and IfFailure nodes.
builder_->Rethrow(block->try_info->exception); builder_->Rethrow(block->try_info->exception);
TerminateThrow(decoder); builder_->TerminateThrow(effect(), control());
return; return;
} }
DCHECK(decoder->control_at(depth)->is_try()); DCHECK(decoder->control_at(depth)->is_try());
...@@ -1064,7 +1059,7 @@ class WasmGraphBuildingInterface { ...@@ -1064,7 +1059,7 @@ class WasmGraphBuildingInterface {
rtt.node, decoder->position()); rtt.node, decoder->position());
// array.new_with_rtt introduces a loop. Therefore, we have to mark the // array.new_with_rtt introduces a loop. Therefore, we have to mark the
// immediately nesting loop (if any) as non-innermost. // immediately nesting loop (if any) as non-innermost.
if (!loop_infos_.empty()) loop_infos_.back().is_innermost = false; if (!loop_infos_.empty()) loop_infos_.back().can_be_innermost = false;
} }
void ArrayNewDefault(FullDecoder* decoder, void ArrayNewDefault(FullDecoder* decoder,
...@@ -1304,13 +1299,18 @@ class WasmGraphBuildingInterface { ...@@ -1304,13 +1299,18 @@ class WasmGraphBuildingInterface {
->try_info; ->try_info;
} }
// Loop exits are only used during loop unrolling and are then removed, as // If {emit_loop_exits()} returns true, we need to emit LoopExit,
// they cannot be handled by later optimization stages. Since unrolling comes // LoopExitEffect, and LoopExit nodes whenever a control resp. effect resp.
// before inlining in the compilation pipeline, we should not emit loop exits // value escapes a loop. We emit loop exits in the following cases:
// in inlined functions. Also, we should not do so when unrolling is disabled. // - When popping the control of a loop.
bool emit_loop_exits() { // - At some nodes which connect to the graph's end. We do not always need to
return FLAG_wasm_loop_unrolling && inlined_status_ == kRegularFunction; // emit loop exits for such nodes, since the wasm loop analysis algorithm
} // can handle a loop body which connects directly to the graph's end.
// However, we need to emit them anyway for nodes that may be rewired to
// different nodes during inlining. These are Return and TailCall nodes.
// - After IfFailure nodes.
// - When exiting a loop through Delegate.
bool emit_loop_exits() { return FLAG_wasm_loop_unrolling; }
void GetNodes(TFNode** nodes, Value* values, size_t count) { void GetNodes(TFNode** nodes, Value* values, size_t count) {
for (size_t i = 0; i < count; ++i) { for (size_t i = 0; i < count; ++i) {
...@@ -1811,21 +1811,6 @@ class WasmGraphBuildingInterface { ...@@ -1811,21 +1811,6 @@ class WasmGraphBuildingInterface {
} }
} }
void TerminateThrow(FullDecoder* decoder) {
if (emit_loop_exits()) {
SsaEnv* internal_env = ssa_env_;
SsaEnv* exit_env = Split(decoder->zone(), ssa_env_);
SetEnv(exit_env);
ValueVector stack_values;
BuildNestedLoopExits(decoder, decoder->control_depth(), false,
stack_values);
builder_->TerminateThrow(effect(), control());
SetEnv(internal_env);
} else {
builder_->TerminateThrow(effect(), control());
}
}
CheckForNull NullCheckFor(ValueType type) { CheckForNull NullCheckFor(ValueType type) {
DCHECK(type.is_object_reference()); DCHECK(type.is_object_reference());
return (!FLAG_experimental_wasm_skip_null_checks && type.is_nullable()) return (!FLAG_experimental_wasm_skip_null_checks && type.is_nullable())
......
...@@ -252,7 +252,6 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js"); ...@@ -252,7 +252,6 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
assertEquals(20, instance.exports.main(10, 20)); assertEquals(20, instance.exports.main(10, 20));
})(); })();
// Tests that no LoopExits are emitted in the inlined function.
(function LoopUnrollingTest() { (function LoopUnrollingTest() {
print(arguments.callee.name); print(arguments.callee.name);
let builder = new WasmModuleBuilder(); let builder = new WasmModuleBuilder();
...@@ -278,6 +277,46 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js"); ...@@ -278,6 +277,46 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
assertEquals(25, instance.exports.main(10)); assertEquals(25, instance.exports.main(10));
})(); })();
(function ThrowInLoopTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
let tag = builder.addTag(kSig_v_i);
// f(x, y) {
// do {
// if (x < 0) throw x;
// y++; x--;
// } while (x > 0);
// return y;
// }
let callee = builder.addFunction("callee", kSig_i_ii)
.addBody([
kExprLoop, kWasmVoid,
kExprLocalGet, 0, kExprI32Const, 0, kExprI32LtS,
kExprIf, kWasmVoid,
kExprLocalGet, 0, kExprThrow, tag,
kExprEnd,
kExprLocalGet, 1, kExprI32Const, 1, kExprI32Add, kExprLocalSet, 1,
kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub, kExprLocalSet, 0,
kExprLocalGet, 0, kExprI32Const, 0, kExprI32GtS, kExprBrIf, 0,
kExprEnd,
kExprLocalGet, 1
]);
// g(x) = (try { f(x, 5) } catch(x) { x }) + x
builder.addFunction("main", kSig_i_i)
.addBody([kExprTry, kWasmI32,
kExprLocalGet, 0, kExprI32Const, 5,
kExprCallFunction, callee.index,
kExprCatch, tag,
kExprEnd,
kExprLocalGet, 0, kExprI32Add])
.exportAs("main");
let instance = builder.instantiate();
assertEquals(25, instance.exports.main(10));
assertEquals(-20, instance.exports.main(-10));
})();
(function InlineSubtypeSignatureTest() { (function InlineSubtypeSignatureTest() {
print(arguments.callee.name); print(arguments.callee.name);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment