Commit ae97e885 authored by Santiago Aboy Solanes's avatar Santiago Aboy Solanes Committed by Commit Bot

[ptr-compr] Implement the DecompressionOptimizer Reducer and its phase

The DecompressionOptimizer aims to avoid adding the root in AnyTagged
or TaggedPointer loads. For the TaggedSigned case, we already solve it
in instruction selection.

The new phase will run only when pointer compression is enabled. For
the moment, it's also requires FLAG_turbo_decompression_elimination to
be false. This latter flag is only temporary to test out the
implementation.

The phase needs to be run when Machine are present in the graph, i.e
at the very end of the pipeline. Also, since this phase may change
the load's MachineRepresentation from Tagged to Compressed, it's best
to run it as late as possible in order to keep the phases that know
about Compressed MachineRepresentation to a minimum.

As an example, if we Load a Tagged value only to Store it back again
(i.e Load -> Store nodes, with the Load being the Store's value) we
don't need to fully decompress it since the Store will ignore the
top bits.

Bug: v8:7703
Change-Id: I6b4aec203ab8cbb540b2513cabb1e2a5691ce938
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1859615
Commit-Queue: Santiago Aboy Solanes <solanes@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64358}
parent 07ee86a5
...@@ -1817,6 +1817,8 @@ v8_compiler_sources = [ ...@@ -1817,6 +1817,8 @@ v8_compiler_sources = [
"src/compiler/dead-code-elimination.h", "src/compiler/dead-code-elimination.h",
"src/compiler/decompression-elimination.cc", "src/compiler/decompression-elimination.cc",
"src/compiler/decompression-elimination.h", "src/compiler/decompression-elimination.h",
"src/compiler/decompression-optimizer.cc",
"src/compiler/decompression-optimizer.h",
"src/compiler/diamond.h", "src/compiler/diamond.h",
"src/compiler/effect-control-linearizer.cc", "src/compiler/effect-control-linearizer.cc",
"src/compiler/effect-control-linearizer.h", "src/compiler/effect-control-linearizer.h",
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/decompression-optimizer.h"
#include "src/compiler/graph.h"
#include "src/compiler/node-properties.h"
namespace v8 {
namespace internal {
namespace compiler {
namespace {
bool IsMachineLoad(Node* const node) {
const IrOpcode::Value opcode = node->opcode();
return opcode == IrOpcode::kLoad || opcode == IrOpcode::kPoisonedLoad ||
opcode == IrOpcode::kProtectedLoad ||
opcode == IrOpcode::kUnalignedLoad;
}
} // anonymous namespace
DecompressionOptimizer::DecompressionOptimizer(Zone* zone, Graph* graph,
MachineOperatorBuilder* machine)
: graph_(graph),
machine_(machine),
states_(graph, static_cast<uint32_t>(State::kNumberOfStates)),
to_visit_(zone),
compressed_loads_(zone) {}
void DecompressionOptimizer::MarkNodes() {
MaybeMarkAndQueueForRevisit(graph()->end(), State::kOnly32BitsObserved);
while (!to_visit_.empty()) {
Node* const node = to_visit_.front();
to_visit_.pop_front();
MarkNodeInputs(node);
}
}
void DecompressionOptimizer::MarkNodeInputs(Node* node) {
// Mark the value inputs.
switch (node->opcode()) {
case IrOpcode::kStore: // Fall through.
case IrOpcode::kProtectedStore: // Fall through.
case IrOpcode::kUnalignedStore:
DCHECK_EQ(node->op()->ValueInputCount(), 3);
MaybeMarkAndQueueForRevisit(node->InputAt(0),
State::kEverythingObserved); // base pointer
MaybeMarkAndQueueForRevisit(node->InputAt(1),
State::kEverythingObserved); // index
// TODO(v8:7703): When the implementation is done, check if this 'if' is
// too restrictive We only mark Tagged stores as 32 bits
if (IsAnyTagged(StoreRepresentationOf(node->op()).representation())) {
MaybeMarkAndQueueForRevisit(node->InputAt(2),
State::kOnly32BitsObserved); // value
} else {
MaybeMarkAndQueueForRevisit(node->InputAt(2),
State::kEverythingObserved); // value
}
break;
default:
// To be conservative, we assume that all value inputs need to be 64 bits
// unless noted otherwise.
for (int i = 0; i < node->op()->ValueInputCount(); ++i) {
MaybeMarkAndQueueForRevisit(node->InputAt(i),
State::kEverythingObserved);
}
break;
}
// We always mark the non-value input nodes as kOnly32BitsObserved so that
// they will be visited. If they need to be kEverythingObserved, they will be
// marked as such in a future pass.
for (int i = node->op()->ValueInputCount(); i < node->InputCount(); ++i) {
MaybeMarkAndQueueForRevisit(node->InputAt(i), State::kOnly32BitsObserved);
}
}
void DecompressionOptimizer::MaybeMarkAndQueueForRevisit(Node* const node,
State state) {
DCHECK_NE(state, State::kUnvisited);
State previous_state = states_.Get(node);
// Only update the state if we have relevant new information.
if (previous_state == State::kUnvisited ||
(previous_state == State::kOnly32BitsObserved &&
state == State::kEverythingObserved)) {
states_.Set(node, state);
to_visit_.push_back(node);
// In the case of a TaggedPointer or TaggedAny Load that can be done in 32
// bits, we save it in compressed_loads_ to be changed later if necessary.
if (state == State::kOnly32BitsObserved && IsMachineLoad(node) &&
CanBeTaggedPointer(LoadRepresentationOf(node->op()).representation())) {
compressed_loads_.push_back(node);
}
}
}
void DecompressionOptimizer::ChangeLoads() {
for (Node* const node : compressed_loads_) {
// compressed_loads_ contains all the nodes that once had the
// State::kOnly32BitsObserved. If we later updated the state to be
// State::IsEverythingObserved, then we have to ignore them. This is less
// costly than removing them from the compressed_loads_ NodeVector when we
// update them to State::IsEverythingObserved.
if (IsEverythingObserved(node)) continue;
// Change to a Compressed MachRep to avoid the full decompression.
LoadRepresentation load_rep = LoadRepresentationOf(node->op());
LoadRepresentation compressed_load_rep;
if (load_rep == MachineType::AnyTagged()) {
compressed_load_rep = MachineType::AnyCompressed();
} else {
DCHECK_EQ(load_rep, MachineType::TaggedPointer());
compressed_load_rep = MachineType::CompressedPointer();
}
// Change to the Operator with the Compressed MachineRepresentation.
switch (node->opcode()) {
case IrOpcode::kLoad:
NodeProperties::ChangeOp(node, machine()->Load(compressed_load_rep));
break;
case IrOpcode::kPoisonedLoad:
NodeProperties::ChangeOp(node,
machine()->PoisonedLoad(compressed_load_rep));
break;
case IrOpcode::kProtectedLoad:
NodeProperties::ChangeOp(node,
machine()->ProtectedLoad(compressed_load_rep));
break;
case IrOpcode::kUnalignedLoad:
NodeProperties::ChangeOp(node,
machine()->UnalignedLoad(compressed_load_rep));
break;
default:
UNREACHABLE();
}
}
}
void DecompressionOptimizer::Reduce() {
MarkNodes();
ChangeLoads();
}
} // namespace compiler
} // namespace internal
} // namespace v8
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_DECOMPRESSION_OPTIMIZER_H_
#define V8_COMPILER_DECOMPRESSION_OPTIMIZER_H_
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-marker.h"
namespace v8 {
namespace internal {
namespace compiler {
// Forward declare.
class Graph;
// DecompressionOptimizer purpose is to avoid the full decompression on Loads
// whenever possible. Its scope is narrowed down to TaggedPointer and AnyTagged,
// since TaggedSigned avoids full decompression always.
// DecompressionOptimizer will run only when pointer compression is enabled. For
// the moment, it's also requires FLAG_turbo_decompression_elimination to be
// disabled. This flag is only temporary to test out the implementation.
// The phase needs to be run when Machine are present in the graph, i.e
// at the very end of the pipeline. Also, since this phase may change
// the load's MachineRepresentation from Tagged to Compressed, it's best
// to run it as late as possible in order to keep the phases that know
// about Compressed MachineRepresentation to a minimum.
// As an example, if we Load a Tagged value only to Store it back again (i.e
// Load -> Store nodes, with the Load's value being the Store's value) we don't
// need to fully decompress it since the Store will ignore the top bits.
class V8_EXPORT_PRIVATE DecompressionOptimizer final {
public:
DecompressionOptimizer(Zone* zone, Graph* graph,
MachineOperatorBuilder* machine);
~DecompressionOptimizer() = default;
// Assign States to the nodes, and then change the loads' Operator to avoid
// decompression if possible.
void Reduce();
private:
// State refers to the node's state as follows:
// * kUnvisited === This node has yet to be visited.
// * kOnly32BitsObserved === This node either has been visited, or is on
// to_visit_. We couldn't find a node that observes the upper bits.
// * kEverythingObserved === This node either has been visited, or is on
// to_visit_. We found at least one node that observes the upper bits.
enum class State : uint8_t {
kUnvisited = 0,
kOnly32BitsObserved,
kEverythingObserved,
kNumberOfStates
};
// Go through the already marked nodes and changed the operation for the loads
// that can avoid the full decompression.
void ChangeLoads();
// Goes through the nodes to mark them all as appropriate. It will visit each
// node at most twice: only when the node was unvisited, then marked as
// kOnly32BitsObserved and visited, and finally marked as kEverythingObserved
// and visited.
void MarkNodes();
// Mark node's input as appropriate, according to node's opcode. Some input
// State may be updated, and therefore has to be revisited.
void MarkNodeInputs(Node* node);
// Mark node's State to be state. We only do this if we have new information,
// i.e either if:
// * We are marking an unvisited node, or
// * We are marking a node as needing 64 bits when we previously had the
// information that it could output 32 bits. Also, we store the TaggedPointer
// and AnyTagged loads that have their state set as kOnly32BitsObserved.
// If the node's state changes, we queue it for revisit.
void MaybeMarkAndQueueForRevisit(Node* const node, State state);
bool IsEverythingObserved(Node* const node) {
return states_.Get(node) == State::kEverythingObserved;
}
Graph* graph() const { return graph_; }
MachineOperatorBuilder* machine() const { return machine_; }
Graph* const graph_;
MachineOperatorBuilder* const machine_;
NodeMarker<State> states_;
// to_visit_ is a Deque but it's used as if it were a Queue. The reason why we
// are using NodeDeque is because it attempts to reuse 'freed' zone memory
// instead of always allocating a new region.
NodeDeque to_visit_;
// Contains the AnyTagged and TaggedPointer loads that can avoid the full
// decompression. In a way, it functions as a NodeSet since each node will be
// contained at most once. It's a Vector since we care about insertion speed.
NodeVector compressed_loads_;
DISALLOW_COPY_AND_ASSIGN(DecompressionOptimizer);
};
} // namespace compiler
} // namespace internal
} // namespace v8
#endif // V8_COMPILER_DECOMPRESSION_OPTIMIZER_H_
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "src/compiler/csa-load-elimination.h" #include "src/compiler/csa-load-elimination.h"
#include "src/compiler/dead-code-elimination.h" #include "src/compiler/dead-code-elimination.h"
#include "src/compiler/decompression-elimination.h" #include "src/compiler/decompression-elimination.h"
#include "src/compiler/decompression-optimizer.h"
#include "src/compiler/effect-control-linearizer.h" #include "src/compiler/effect-control-linearizer.h"
#include "src/compiler/escape-analysis-reducer.h" #include "src/compiler/escape-analysis-reducer.h"
#include "src/compiler/escape-analysis.h" #include "src/compiler/escape-analysis.h"
...@@ -1793,6 +1794,18 @@ struct MachineOperatorOptimizationPhase { ...@@ -1793,6 +1794,18 @@ struct MachineOperatorOptimizationPhase {
} }
}; };
struct DecompressionOptimizationPhase {
static const char* phase_name() { return "V8.TFDecompressionOptimization"; }
void Run(PipelineData* data, Zone* temp_zone) {
if (COMPRESS_POINTERS_BOOL && !FLAG_turbo_decompression_elimination) {
DecompressionOptimizer decompression_optimizer(temp_zone, data->graph(),
data->machine());
decompression_optimizer.Reduce();
}
}
};
struct MidTierMachineLoweringPhase { struct MidTierMachineLoweringPhase {
static const char* phase_name() { return "V8.TFMidTierMachineLoweringPhase"; } static const char* phase_name() { return "V8.TFMidTierMachineLoweringPhase"; }
...@@ -2424,6 +2437,9 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) { ...@@ -2424,6 +2437,9 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
Run<MachineOperatorOptimizationPhase>(); Run<MachineOperatorOptimizationPhase>();
RunPrintAndVerify(MachineOperatorOptimizationPhase::phase_name(), true); RunPrintAndVerify(MachineOperatorOptimizationPhase::phase_name(), true);
Run<DecompressionOptimizationPhase>();
RunPrintAndVerify(DecompressionOptimizationPhase::phase_name(), true);
data->source_positions()->RemoveDecorator(); data->source_positions()->RemoveDecorator();
if (data->info()->trace_turbo_json_enabled()) { if (data->info()->trace_turbo_json_enabled()) {
data->node_origins()->RemoveDecorator(); data->node_origins()->RemoveDecorator();
......
...@@ -102,6 +102,7 @@ v8_source_set("unittests_sources") { ...@@ -102,6 +102,7 @@ v8_source_set("unittests_sources") {
"compiler/control-flow-optimizer-unittest.cc", "compiler/control-flow-optimizer-unittest.cc",
"compiler/dead-code-elimination-unittest.cc", "compiler/dead-code-elimination-unittest.cc",
"compiler/decompression-elimination-unittest.cc", "compiler/decompression-elimination-unittest.cc",
"compiler/decompression-optimizer-unittest.cc",
"compiler/diamond-unittest.cc", "compiler/diamond-unittest.cc",
"compiler/effect-control-linearizer-unittest.cc", "compiler/effect-control-linearizer-unittest.cc",
"compiler/graph-reducer-unittest.cc", "compiler/graph-reducer-unittest.cc",
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/decompression-optimizer.h"
#include "test/unittests/compiler/graph-unittest.h"
namespace v8 {
namespace internal {
namespace compiler {
class DecompressionOptimizerTest : public GraphTest {
public:
DecompressionOptimizerTest()
: GraphTest(),
machine_(zone(), MachineType::PointerRepresentation(),
MachineOperatorBuilder::kNoFlags) {}
~DecompressionOptimizerTest() override = default;
protected:
void Reduce() {
DecompressionOptimizer decompression_optimizer(zone(), graph(), machine());
decompression_optimizer.Reduce();
}
MachineRepresentation CompressedMachRep(MachineRepresentation mach_rep) {
if (mach_rep == MachineRepresentation::kTagged) {
return MachineRepresentation::kCompressed;
} else {
DCHECK_EQ(mach_rep, MachineRepresentation::kTaggedPointer);
return MachineRepresentation::kCompressedPointer;
}
}
MachineRepresentation CompressedMachRep(MachineType type) {
return CompressedMachRep(type.representation());
}
MachineRepresentation LoadMachRep(Node* node) {
return LoadRepresentationOf(node->op()).representation();
}
const MachineType types[2] = {MachineType::AnyTagged(),
MachineType::TaggedPointer()};
StoreRepresentation CreateStoreRep(MachineType type) {
return StoreRepresentation(type.representation(),
WriteBarrierKind::kFullWriteBarrier);
}
MachineOperatorBuilder* machine() { return &machine_; }
private:
MachineOperatorBuilder machine_;
};
// -----------------------------------------------------------------------------
// Direct Load into Store.
TEST_F(DecompressionOptimizerTest, DirectLoadStore) {
// Skip test if pointer compression is disabled, or decompression elimination
// enabled.
if (!COMPRESS_POINTERS_BOOL || FLAG_turbo_decompression_elimination) {
return;
}
// Define variables.
Node* const control = graph()->start();
Node* object = Parameter(Type::Any(), 0);
Node* effect = graph()->start();
Node* index = Parameter(Type::UnsignedSmall(), 1);
// Test for both AnyTagged and TaggedPointer.
for (size_t i = 0; i < arraysize(types); ++i) {
// Create the graph.
Node* base_pointer = graph()->NewNode(machine()->Load(types[i]), object,
index, effect, control);
Node* value = graph()->NewNode(machine()->Load(types[i]), base_pointer,
index, effect, control);
graph()->SetEnd(graph()->NewNode(machine()->Store(CreateStoreRep(types[i])),
object, index, value, effect, control));
// Change the loads, and test the change.
Reduce();
EXPECT_EQ(LoadMachRep(base_pointer), types[i].representation());
EXPECT_EQ(LoadMachRep(value), CompressedMachRep(types[i]));
}
}
} // namespace compiler
} // namespace internal
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment