Commit 40ebe845 authored by Manos Koukoutos's avatar Manos Koukoutos Committed by Commit Bot

[wasm][turbofan] Implement loop unrolling for wasm

Design doc: https://docs.google.com/document/d/1AsUCqslMUB6fLdnGq0ZoPk2kn50jIJAWAL77lKXXP5g/

Currently, wasm loop unrolling is disabled by default. We intend to
further investigate its compilation time cost and running time benefits
before enabling it.

Additional changes:
- Introduce LoopFinder::FindUnnestedLoopFromHeader() as a lightweight
  loop analysis.
- Move EliminateLoopExit into LoopPeeling and expose it.
- Introduce loop_info_ field into WasmGraphBuildingInterface, fill it
  up in Loop().
- Break after encountering the first loop in BuildNestedLoopExits.
- Introduce struct WasmLoopInfo. A WasmLoopInfo vector is instantiated
  in ExecuteTurbofanWasmCompilation, passed to BuildGraphForWasmFunction
  to be filled up by WasmGraphBuildingInterface, and then passed to
  GenerateCodeForWasmFunction to be used in WasmLoopUnrollingPhase.
- Introduce WasmLoopUnrollingPhase and insert it into the wasm
  compilation pipeline.
- Fix an issue where exception values were not wrapped in
  WasmGraphBuilderInterface.
- Update --wasm-loop-unrolling flag description.

Bug: v8:11298
Change-Id: I4b57cf2ea8520931f60769f843ffd57b3ca6399b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2697349
Commit-Queue: Manos Koukoutos <manoskouk@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Reviewed-by: 's avatarNico Hartmann <nicohartmann@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73009}
parent 80635217
......@@ -2322,6 +2322,8 @@ v8_compiler_sources = [
"src/compiler/loop-analysis.h",
"src/compiler/loop-peeling.cc",
"src/compiler/loop-peeling.h",
"src/compiler/loop-unrolling.cc",
"src/compiler/loop-unrolling.h",
"src/compiler/loop-variable-optimizer.cc",
"src/compiler/loop-variable-optimizer.h",
"src/compiler/machine-graph-verifier.cc",
......
......@@ -39,7 +39,6 @@ struct TempLoopInfo {
LoopTree::Loop* loop;
};
// Encapsulation of the loop finding algorithm.
// -----------------------------------------------------------------------------
// Conceptually, the contents of a loop are those nodes that are "between" the
......@@ -54,6 +53,8 @@ struct TempLoopInfo {
// 1 bit per loop per node per direction are required during the marking phase.
// To handle nested loops correctly, the algorithm must filter some reachability
// marks on edges into/out-of the loop header nodes.
// Note: this algorithm assumes there are no unreachable loop header nodes
// (including loop phis).
class LoopFinderImpl {
public:
LoopFinderImpl(Graph* graph, LoopTree* loop_tree, TickCounter* tick_counter,
......@@ -542,6 +543,53 @@ LoopTree* LoopFinder::BuildLoopTree(Graph* graph, TickCounter* tick_counter,
return loop_tree;
}
ZoneUnorderedSet<Node*>* LoopFinder::FindUnnestedLoopFromHeader(
Node* loop_header, Zone* zone) {
auto* visited = zone->New<ZoneUnorderedSet<Node*>>(zone);
std::vector<Node*> queue;
DCHECK(loop_header->opcode() == IrOpcode::kLoop);
queue.push_back(loop_header);
while (!queue.empty()) {
Node* node = queue.back();
queue.pop_back();
// Terminate is not part of the loop, and neither are its uses.
if (node->opcode() == IrOpcode::kTerminate) {
DCHECK_EQ(node->InputAt(1), loop_header);
continue;
}
visited->insert(node);
switch (node->opcode()) {
case IrOpcode::kLoopExit:
DCHECK_EQ(node->InputAt(1), loop_header);
// LoopExitValue/Effect uses are inside the loop. The rest are not.
for (Node* use : node->uses()) {
if (use->opcode() == IrOpcode::kLoopExitEffect ||
use->opcode() == IrOpcode::kLoopExitValue) {
if (visited->count(use) == 0) queue.push_back(use);
}
}
break;
case IrOpcode::kLoopExitEffect:
case IrOpcode::kLoopExitValue:
DCHECK_EQ(NodeProperties::GetControlInput(node)->InputAt(1),
loop_header);
// All uses are outside the loop, do nothing.
break;
default:
for (Node* use : node->uses()) {
if (visited->count(use) == 0) queue.push_back(use);
}
break;
}
}
return visited;
}
bool LoopFinder::HasMarkedExits(LoopTree* loop_tree,
const LoopTree::Loop* loop) {
// Look for returns and if projections that are outside the loop but whose
......
......@@ -178,6 +178,15 @@ class V8_EXPORT_PRIVATE LoopFinder {
Zone* temp_zone);
static bool HasMarkedExits(LoopTree* loop_tree_, const LoopTree::Loop* loop);
// Find all nodes of a loop given its header node. This is much more
// restricted than BuildLoopTree.
// Assumptions:
// 1) All loop exits of the loop are marked with LoopExit, LoopExitEffect,
// and LoopExitValue nodes.
// 2) There are no nested loops within this loop.
static ZoneUnorderedSet<Node*>* FindUnnestedLoopFromHeader(Node* loop_header,
Zone* zone);
};
// Copies a range of nodes any number of times.
......
......@@ -236,9 +236,7 @@ void LoopPeeler::PeelInnerLoops(LoopTree::Loop* loop) {
Peel(loop);
}
namespace {
void EliminateLoopExit(Node* node) {
void LoopPeeler::EliminateLoopExit(Node* node) {
DCHECK_EQ(IrOpcode::kLoopExit, node->opcode());
// The exit markers take the loop exit as input. We iterate over uses
// and remove all the markers from the graph.
......@@ -260,8 +258,6 @@ void EliminateLoopExit(Node* node) {
node->Kill();
}
} // namespace
void LoopPeeler::PeelInnerLoopsOfTree() {
for (LoopTree::Loop* loop : loop_tree_->outer_loops()) {
PeelInnerLoops(loop);
......
......@@ -50,6 +50,7 @@ class V8_EXPORT_PRIVATE LoopPeeler {
void PeelInnerLoopsOfTree();
static void EliminateLoopExits(Graph* graph, Zone* tmp_zone);
static void EliminateLoopExit(Node* loop);
static const size_t kMaxPeeledNodes = 1000;
private:
......
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/loop-unrolling.h"
#include "src/base/small-vector.h"
#include "src/codegen/tick-counter.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/loop-analysis.h"
#include "src/compiler/loop-peeling.h"
namespace v8 {
namespace internal {
namespace compiler {
// A simple heuristic to decide how many times to unroll a loop. Favors small
// and deeply nested loops.
// TODO(manoskouk): Investigate how this can be improved.
V8_INLINE uint32_t unrolling_count_heuristic(uint32_t size, uint32_t depth) {
static constexpr uint32_t kMaximumUnnestedSize = 50;
static constexpr uint32_t kMaximumUnrollingCount = 7;
return std::min((depth + 1) * kMaximumUnnestedSize / size,
kMaximumUnrollingCount);
}
void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth,
Graph* graph, CommonOperatorBuilder* common, Zone* tmp_zone,
SourcePositionTable* source_positions,
NodeOriginTable* node_origins) {
DCHECK_EQ(loop_node->opcode(), IrOpcode::kLoop);
// No back-jump to the loop header means this is not really a loop.
if (loop_node->InputCount() < 2) return;
uint32_t unrolling_count =
unrolling_count_heuristic(static_cast<uint32_t>(loop->size()), depth);
if (unrolling_count == 0) return;
uint32_t iteration_count = unrolling_count + 1;
uint32_t copied_size = static_cast<uint32_t>(loop->size()) * iteration_count;
NodeVector copies(tmp_zone);
NodeCopier copier(graph, copied_size, &copies, unrolling_count);
{
std::vector<Node*> loop_nodes(loop->begin(), loop->end());
copier.CopyNodes(
graph, tmp_zone, graph->NewNode(common->Dead()),
NodeRange(loop_nodes.data(), loop_nodes.data() + loop_nodes.size()),
source_positions, node_origins);
}
#define COPY(node, n) copier.map(node, n)
#define FOREACH_COPY_INDEX(i) for (uint32_t i = 0; i < unrolling_count; i++)
for (Node* node : *loop) {
switch (node->opcode()) {
case IrOpcode::kStackPointerGreaterThan: {
/*** Step 1: Remove stack checks from all but the first iteration of the
loop. ***/
for (Edge edge : node->use_edges()) {
if (edge.from()->opcode() == IrOpcode::kBranch) {
FOREACH_COPY_INDEX(i) {
COPY(edge.from(), i)
->ReplaceInput(0, graph->NewNode(common->Int32Constant(1)));
}
} else if (edge.from()->opcode() == IrOpcode::kEffectPhi) {
// We now need to remove stack check and the related function call
// from the effect chain.
// The effect chain looks like this (* stand for irrelevant nodes):
//
// replacing effect (effect before stack check)
// * * | *
// | | | |
// ( Load )
// * * | *
// | | | |
// ( Load )
// | |
// stack check
// | * | *
// | | | |
// | (call)
// | | *
// | | |
// stack check effect (that we need to replace)
Node* stack_check_effect = edge.from();
DCHECK_EQ(edge.index(), 0);
DCHECK_EQ(stack_check_effect->InputAt(1)->opcode(),
IrOpcode::kCall);
DCHECK_EQ(stack_check_effect->InputAt(1)->InputAt(1), node);
DCHECK_EQ(node->InputAt(1)->opcode(), IrOpcode::kLoad);
DCHECK_EQ(node->InputAt(1)->InputAt(2)->opcode(), IrOpcode::kLoad);
Node* replacing_effect = node->InputAt(1)->InputAt(2)->InputAt(2);
FOREACH_COPY_INDEX(i) {
COPY(stack_check_effect, i)
->ReplaceUses(COPY(replacing_effect, i));
}
}
}
break;
}
case IrOpcode::kLoopExit: {
/*** Step 2: Create merges for loop exits. ***/
if (node->InputAt(1) == loop_node) {
// Create a merge node from all iteration exits.
Node** merge_inputs = tmp_zone->NewArray<Node*>(iteration_count);
merge_inputs[0] = node;
for (uint32_t i = 1; i < iteration_count; i++) {
merge_inputs[i] = COPY(node, i - 1);
}
Node* merge_node = graph->NewNode(common->Merge(iteration_count),
iteration_count, merge_inputs);
// Replace all uses of the loop exit with the merge node.
for (Edge use_edge : node->use_edges()) {
Node* use = use_edge.from();
if (loop->count(use) == 1) {
// Uses within the loop will be LoopExitEffects and
// LoopExitValues. We need to create a phi from all loop
// iterations. Its merge will be the merge node for LoopExits.
const Operator* phi_operator;
if (use->opcode() == IrOpcode::kLoopExitEffect) {
phi_operator = common->EffectPhi(iteration_count);
} else {
DCHECK(use->opcode() == IrOpcode::kLoopExitValue);
phi_operator = common->Phi(
LoopExitValueRepresentationOf(use->op()), iteration_count);
}
Node** phi_inputs =
tmp_zone->NewArray<Node*>(iteration_count + 1);
phi_inputs[0] = use;
for (uint32_t i = 1; i < iteration_count; i++) {
phi_inputs[i] = COPY(use, i - 1);
}
phi_inputs[iteration_count] = merge_node;
Node* phi =
graph->NewNode(phi_operator, iteration_count + 1, phi_inputs);
use->ReplaceUses(phi);
// Repair phi which we just broke.
phi->ReplaceInput(0, use);
} else if (use != merge_node) {
// For uses outside the loop, simply redirect them to the merge.
use->ReplaceInput(use_edge.index(), merge_node);
}
}
}
break;
}
default:
break;
}
}
/*** Step 3: Rewire the iterations of the loop. Each iteration should flow
into the next one, and the last should flow into the first. ***/
// 3a) Rewire control.
// We start at index=1 assuming that index=0 is the (non-recursive) loop
// entry.
for (int input_index = 1; input_index < loop_node->InputCount();
input_index++) {
Node* last_iteration_input =
COPY(loop_node, unrolling_count - 1)->InputAt(input_index);
for (uint32_t copy_index = unrolling_count - 1; copy_index > 0;
copy_index--) {
COPY(loop_node, copy_index)
->ReplaceInput(input_index,
COPY(loop_node, copy_index - 1)->InputAt(input_index));
}
COPY(loop_node, 0)
->ReplaceInput(input_index, loop_node->InputAt(input_index));
loop_node->ReplaceInput(input_index, last_iteration_input);
}
// The loop of each following iteration will become a merge. We need to remove
// its non-recursive input.
FOREACH_COPY_INDEX(i) {
COPY(loop_node, i)->RemoveInput(0);
NodeProperties::ChangeOp(COPY(loop_node, i),
common->Merge(loop_node->InputCount() - 1));
}
// 3b) Rewire phis and loop exits.
for (Node* use : loop_node->uses()) {
if (NodeProperties::IsPhi(use)) {
int count = use->opcode() == IrOpcode::kPhi
? use->op()->ValueInputCount()
: use->op()->EffectInputCount();
// Phis depending on the loop header should take their input from the
// previous iteration instead.
for (int input_index = 1; input_index < count; input_index++) {
Node* last_iteration_input =
COPY(use, unrolling_count - 1)->InputAt(input_index);
for (uint32_t copy_index = unrolling_count - 1; copy_index > 0;
copy_index--) {
COPY(use, copy_index)
->ReplaceInput(input_index,
COPY(use, copy_index - 1)->InputAt(input_index));
}
COPY(use, 0)->ReplaceInput(input_index, use->InputAt(input_index));
use->ReplaceInput(input_index, last_iteration_input);
}
// Phis in each following iteration should not depend on the
// (non-recursive) entry to the loop. Remove their first input.
FOREACH_COPY_INDEX(i) {
COPY(use, i)->RemoveInput(0);
NodeProperties::ChangeOp(
COPY(use, i), common->ResizeMergeOrPhi(use->op(), count - 1));
}
}
// Loop exits should point to the loop header.
if (use->opcode() == IrOpcode::kLoopExit) {
FOREACH_COPY_INDEX(i) { COPY(use, i)->ReplaceInput(1, loop_node); }
}
}
}
#undef COPY
#undef FOREACH_COPY_INDEX
} // namespace compiler
} // namespace internal
} // namespace v8
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_LOOP_UNROLLING_H_
#define V8_COMPILER_LOOP_UNROLLING_H_
// Loop unrolling is an optimization that copies the body of a loop and creates
// a fresh loop, whose iteration corresponds to 2 or more iterations of the
// initial loop. For a high-level description of the algorithm see
// docs.google.com/document/d/1AsUCqslMUB6fLdnGq0ZoPk2kn50jIJAWAL77lKXXP5g/
#include "src/compiler/common-operator.h"
#include "src/compiler/loop-analysis.h"
namespace v8 {
namespace internal {
namespace compiler {
void UnrollLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, uint32_t depth,
Graph* graph, CommonOperatorBuilder* common, Zone* tmp_zone,
SourcePositionTable* source_positions,
NodeOriginTable* node_origins);
} // namespace compiler
} // namespace internal
} // namespace v8
#endif // V8_COMPILER_LOOP_UNROLLING_H_
......@@ -56,6 +56,7 @@
#include "src/compiler/load-elimination.h"
#include "src/compiler/loop-analysis.h"
#include "src/compiler/loop-peeling.h"
#include "src/compiler/loop-unrolling.h"
#include "src/compiler/loop-variable-optimizer.h"
#include "src/compiler/machine-graph-verifier.h"
#include "src/compiler/machine-operator-reducer.h"
......@@ -1776,6 +1777,37 @@ struct LoopPeelingPhase {
}
};
struct WasmLoopUnrollingPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmLoopUnrolling)
void Run(PipelineData* data, Zone* temp_zone,
std::vector<compiler::WasmLoopInfo>* loop_infos) {
for (WasmLoopInfo& loop_info : *loop_infos) {
if (loop_info.is_innermost) {
ZoneUnorderedSet<Node*>* loop =
LoopFinder::FindUnnestedLoopFromHeader(loop_info.header, temp_zone);
UnrollLoop(loop_info.header, loop, loop_info.nesting_depth,
data->graph(), data->common(), temp_zone,
data->source_positions(), data->node_origins());
}
}
for (WasmLoopInfo& loop_info : *loop_infos) {
std::unordered_set<Node*> loop_exits;
// We collect exits into a set first because we are not allowed to mutate
// them while iterating uses().
for (Node* use : loop_info.header->uses()) {
if (use->opcode() == IrOpcode::kLoopExit) {
loop_exits.insert(use);
}
}
for (Node* use : loop_exits) {
LoopPeeler::EliminateLoopExit(use);
}
}
}
};
struct LoopExitEliminationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(LoopExitElimination)
......@@ -3209,7 +3241,7 @@ void Pipeline::GenerateCodeForWasmFunction(
MachineGraph* mcgraph, CallDescriptor* call_descriptor,
SourcePositionTable* source_positions, NodeOriginTable* node_origins,
wasm::FunctionBody function_body, const wasm::WasmModule* module,
int function_index) {
int function_index, std::vector<compiler::WasmLoopInfo>* loop_info) {
ZoneStats zone_stats(wasm_engine->allocator());
std::unique_ptr<PipelineStatistics> pipeline_statistics(
CreatePipelineStatistics(wasm_engine, function_body, module, info,
......@@ -3236,8 +3268,8 @@ void Pipeline::GenerateCodeForWasmFunction(
pipeline.RunPrintAndVerify("V8.WasmMachineCode", true);
if (FLAG_wasm_loop_unrolling) {
pipeline.Run<LoopExitEliminationPhase>();
pipeline.RunPrintAndVerify("V8.LoopExitEliminationPhase", true);
pipeline.Run<WasmLoopUnrollingPhase>(loop_info);
pipeline.RunPrintAndVerify("V8.WasmLoopUnrolling", true);
}
data.BeginPhaseKind("V8.WasmOptimization");
......
......@@ -41,6 +41,7 @@ class MachineGraph;
class NodeOriginTable;
class Schedule;
class SourcePositionTable;
struct WasmLoopInfo;
class Pipeline : public AllStatic {
public:
......@@ -57,7 +58,7 @@ class Pipeline : public AllStatic {
MachineGraph* mcgraph, CallDescriptor* call_descriptor,
SourcePositionTable* source_positions, NodeOriginTable* node_origins,
wasm::FunctionBody function_body, const wasm::WasmModule* module,
int function_index);
int function_index, std::vector<compiler::WasmLoopInfo>* loop_infos);
// Run the pipeline on a machine graph and generate code.
static wasm::WasmCompilationResult GenerateCodeForWasmNativeStub(
......
......@@ -7829,14 +7829,15 @@ bool BuildGraphForWasmFunction(AccountingAllocator* allocator,
const wasm::FunctionBody& func_body,
int func_index, wasm::WasmFeatures* detected,
MachineGraph* mcgraph,
std::vector<compiler::WasmLoopInfo>* loop_infos,
NodeOriginTable* node_origins,
SourcePositionTable* source_positions) {
// Create a TF graph during decoding.
WasmGraphBuilder builder(env, mcgraph->zone(), mcgraph, func_body.sig,
source_positions);
wasm::VoidResult graph_construction_result =
wasm::BuildTFGraph(allocator, env->enabled_features, env->module,
&builder, detected, func_body, node_origins);
wasm::VoidResult graph_construction_result = wasm::BuildTFGraph(
allocator, env->enabled_features, env->module, &builder, detected,
func_body, loop_infos, node_origins);
if (graph_construction_result.failed()) {
if (FLAG_trace_wasm_compiler) {
StdoutStream{} << "Compilation failed: "
......@@ -7943,9 +7944,12 @@ wasm::WasmCompilationResult ExecuteTurbofanWasmCompilation(
: nullptr;
SourcePositionTable* source_positions =
mcgraph->zone()->New<SourcePositionTable>(mcgraph->graph());
std::vector<WasmLoopInfo> loop_infos;
if (!BuildGraphForWasmFunction(wasm_engine->allocator(), env, func_body,
func_index, detected, mcgraph, node_origins,
source_positions)) {
func_index, detected, mcgraph, &loop_infos,
node_origins, source_positions)) {
return wasm::WasmCompilationResult{};
}
......@@ -7966,7 +7970,7 @@ wasm::WasmCompilationResult ExecuteTurbofanWasmCompilation(
Pipeline::GenerateCodeForWasmFunction(
&info, wasm_engine, mcgraph, call_descriptor, source_positions,
node_origins, func_body, env->module, func_index);
node_origins, func_body, env->module, func_index, &loop_infos);
if (counters) {
counters->wasm_compile_function_peak_memory_bytes()->AddSample(
......
......@@ -177,6 +177,17 @@ struct WasmInstanceCacheNodes {
Node* mem_mask;
};
struct WasmLoopInfo {
Node* header;
uint32_t nesting_depth;
bool is_innermost;
WasmLoopInfo(Node* header, uint32_t nesting_depth, bool is_innermost)
: header(header),
nesting_depth(nesting_depth),
is_innermost(is_innermost) {}
};
// Abstracts details of building TurboFan graph nodes for wasm to separate
// the wasm decoder from the internal details of TurboFan.
class WasmGraphBuilder {
......
......@@ -934,8 +934,7 @@ DEFINE_BOOL(wasm_math_intrinsics, true,
"intrinsify some Math imports into wasm")
DEFINE_BOOL(wasm_loop_unrolling, false,
"generate and then remove loop exits in wasm turbofan code "
"(placeholder for future loop unrolling feature)")
"enable loop unrolling for wasm functions (experimental)")
DEFINE_BOOL(wasm_trap_handler, true,
"use signal handlers to catch out of bounds memory access in wasm"
" (currently Linux x86_64 only)")
......
......@@ -931,6 +931,7 @@ class RuntimeCallTimer final {
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, LocateSpillSlots) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, LoopExitElimination) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, LoopPeeling) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmLoopUnrolling) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, MachineOperatorOptimization) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, MeetRegisterConstraints) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, MemoryOptimization) \
......
......@@ -178,6 +178,26 @@ class WasmGraphBuildingInterface {
ssa_env_->state = SsaEnv::kMerged;
TFNode* loop_node = builder_->Loop(control());
if (FLAG_wasm_loop_unrolling) {
uint32_t nesting_depth = 0;
for (uint32_t depth = 1; depth < decoder->control_depth(); depth++) {
if (decoder->control_at(depth)->is_loop()) {
nesting_depth++;
}
}
// If this loop is nested, the parent loop's is_innermost field needs to
// be false. If the last loop in loop_infos_ has less depth, it has to be
// the parent loop. If it does not, it means another loop has been found
// within the parent loop, and that loop will have set the parent's
// is_innermost to false, so we do not need to do anything.
if (nesting_depth > 0 &&
loop_infos_.back().nesting_depth < nesting_depth) {
loop_infos_.back().is_innermost = false;
}
loop_infos_.emplace_back(loop_node, nesting_depth, true);
}
builder_->SetControl(loop_node);
decoder->control_at(0)->loop_node = loop_node;
......@@ -732,7 +752,8 @@ class WasmGraphBuildingInterface {
TryInfo* target_try = decoder->control_at(depth)->try_info;
if (FLAG_wasm_loop_unrolling) {
StackValueVector stack_values;
BuildNestedLoopExits(decoder, depth, true, stack_values);
BuildNestedLoopExits(decoder, depth, true, stack_values,
&block->try_info->exception);
}
Goto(decoder, target_try->catch_env);
......@@ -1055,10 +1076,14 @@ class WasmGraphBuildingInterface {
to->node = from.node;
}
std::vector<compiler::WasmLoopInfo> loop_infos() { return loop_infos_; }
private:
SsaEnv* ssa_env_ = nullptr;
compiler::WasmGraphBuilder* builder_;
uint32_t current_catch_ = kNullCatch;
// Tracks loop data for loop unrolling.
std::vector<compiler::WasmLoopInfo> loop_infos_;
TFNode* effect() { return builder_->effect(); }
......@@ -1143,7 +1168,7 @@ class WasmGraphBuildingInterface {
if (FLAG_wasm_loop_unrolling) {
StackValueVector values;
BuildNestedLoopExits(decoder, control_depth_of_current_catch(decoder),
true, values);
true, values, &if_exception);
}
Goto(decoder, try_info->catch_env);
if (try_info->exception == nullptr) {
......@@ -1421,16 +1446,28 @@ class WasmGraphBuildingInterface {
void BuildNestedLoopExits(FullDecoder* decoder, uint32_t depth_limit,
bool wrap_exit_values,
StackValueVector& stack_values) {
StackValueVector& stack_values,
TFNode** exception_value = nullptr) {
DCHECK(FLAG_wasm_loop_unrolling);
Control* control = nullptr;
// We are only interested in exits from the innermost loop.
for (uint32_t i = 0; i < depth_limit; i++) {
Control* control = decoder->control_at(i);
if (!control->is_loop()) continue;
Control* c = decoder->control_at(i);
if (c->is_loop()) {
control = c;
break;
}
}
if (control != nullptr) {
BuildLoopExits(decoder, control);
for (Value& value : stack_values) {
value.node = builder_->LoopExitValue(
value.node, value.type.machine_representation());
}
if (exception_value != nullptr) {
*exception_value = builder_->LoopExitValue(
*exception_value, MachineRepresentation::kWord32);
}
if (wrap_exit_values) {
WrapLocalsAtLoopExit(decoder, control);
}
......@@ -1459,6 +1496,7 @@ DecodeResult BuildTFGraph(AccountingAllocator* allocator,
const WasmFeatures& enabled, const WasmModule* module,
compiler::WasmGraphBuilder* builder,
WasmFeatures* detected, const FunctionBody& body,
std::vector<compiler::WasmLoopInfo>* loop_infos,
compiler::NodeOriginTable* node_origins) {
Zone zone(allocator, ZONE_NAME);
WasmFullDecoder<Decoder::kFullValidation, WasmGraphBuildingInterface> decoder(
......@@ -1470,6 +1508,9 @@ DecodeResult BuildTFGraph(AccountingAllocator* allocator,
if (node_origins) {
builder->RemoveBytecodePositionDecorator();
}
if (FLAG_wasm_loop_unrolling) {
*loop_infos = decoder.interface().loop_infos();
}
return decoder.toResult(nullptr);
}
......
......@@ -15,6 +15,7 @@ namespace internal {
namespace compiler { // external declarations from compiler.
class NodeOriginTable;
class WasmGraphBuilder;
struct WasmLoopInfo;
} // namespace compiler
namespace wasm {
......@@ -27,6 +28,7 @@ V8_EXPORT_PRIVATE DecodeResult
BuildTFGraph(AccountingAllocator* allocator, const WasmFeatures& enabled,
const WasmModule* module, compiler::WasmGraphBuilder* builder,
WasmFeatures* detected, const FunctionBody& body,
std::vector<compiler::WasmLoopInfo>* loop_infos,
compiler::NodeOriginTable* node_origins);
} // namespace wasm
......
......@@ -359,16 +359,18 @@ void TestBuildingGraphWithBuilder(compiler::WasmGraphBuilder* builder,
const byte* start, const byte* end) {
WasmFeatures unused_detected_features;
FunctionBody body(sig, 0, start, end);
std::vector<compiler::WasmLoopInfo> loops;
DecodeResult result =
BuildTFGraph(zone->allocator(), WasmFeatures::All(), nullptr, builder,
&unused_detected_features, body, nullptr);
&unused_detected_features, body, &loops, nullptr);
if (result.failed()) {
#ifdef DEBUG
if (!FLAG_trace_wasm_decoder) {
// Retry the compilation with the tracing flag on, to help in debugging.
FLAG_trace_wasm_decoder = true;
result = BuildTFGraph(zone->allocator(), WasmFeatures::All(), nullptr,
builder, &unused_detected_features, body, nullptr);
result =
BuildTFGraph(zone->allocator(), WasmFeatures::All(), nullptr, builder,
&unused_detected_features, body, &loops, nullptr);
}
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment