Commit 197471fa authored by Manos Koukoutos's avatar Manos Koukoutos Committed by V8 LUCI CQ

[turboshaft][wasm] Implement some wasm requirements

- Add Turboshaft to the wasm pipeline (behind a flag).
- Add a few operators.
- Implement SimplifyLoopsPhase, which ensures each loop has at most
  two inputs.
- Remove the unneeded effect argument from
  {FlagContinuation::FromTrap}.

Bug: v8:12783
Change-Id: I03a3f8cf3af40fc75bf57cfbad973b754b13dd8c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3899126
Commit-Queue: Manos Koukoutos <manoskouk@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#83365}
parent 5dfa2195
......@@ -2881,6 +2881,8 @@ filegroup(
"src/compiler/turboshaft/representations.cc",
"src/compiler/turboshaft/representations.h",
"src/compiler/turboshaft/sidetable.h",
"src/compiler/turboshaft/simplify-tf-loops.cc",
"src/compiler/turboshaft/simplify-tf-loops.h",
"src/compiler/turboshaft/utils.h",
"src/compiler/turboshaft/value-numbering-assembler.h",
"src/compiler/type-cache.cc",
......
......@@ -2935,6 +2935,7 @@ v8_header_set("v8_internal_headers") {
"src/compiler/turboshaft/recreate-schedule.h",
"src/compiler/turboshaft/representations.h",
"src/compiler/turboshaft/sidetable.h",
"src/compiler/turboshaft/simplify-tf-loops.h",
"src/compiler/turboshaft/utils.h",
"src/compiler/turboshaft/value-numbering-assembler.h",
"src/compiler/type-cache.h",
......@@ -4231,6 +4232,7 @@ v8_source_set("v8_turboshaft") {
"src/compiler/turboshaft/optimization-phase.cc",
"src/compiler/turboshaft/recreate-schedule.cc",
"src/compiler/turboshaft/representations.cc",
"src/compiler/turboshaft/simplify-tf-loops.cc",
]
public_deps = [
......
......@@ -14,12 +14,9 @@
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/compiler-source-position-table.h"
#include "src/compiler/js-heap-broker.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/pipeline.h"
#include "src/compiler/schedule.h"
#include "src/compiler/state-values-utils.h"
#include "src/deoptimizer/deoptimizer.h"
#if V8_ENABLE_WEBASSEMBLY
#include "src/wasm/simd-shuffle.h"
......@@ -3137,14 +3134,12 @@ void InstructionSelector::VisitSelect(Node* node) {
}
void InstructionSelector::VisitTrapIf(Node* node, TrapId trap_id) {
FlagsContinuation cont =
FlagsContinuation::ForTrap(kNotEqual, trap_id, node->InputAt(1));
FlagsContinuation cont = FlagsContinuation::ForTrap(kNotEqual, trap_id);
VisitWordCompareZero(node, node->InputAt(0), &cont);
}
void InstructionSelector::VisitTrapUnless(Node* node, TrapId trap_id) {
FlagsContinuation cont =
FlagsContinuation::ForTrap(kEqual, trap_id, node->InputAt(1));
FlagsContinuation cont = FlagsContinuation::ForTrap(kEqual, trap_id);
VisitWordCompareZero(node, node->InputAt(0), &cont);
}
......
......@@ -8,7 +8,6 @@
#include <map>
#include "src/codegen/cpu-features.h"
#include "src/common/globals.h"
#include "src/compiler/backend/instruction-scheduler.h"
#include "src/compiler/backend/instruction.h"
#include "src/compiler/common-operator.h"
......@@ -78,9 +77,8 @@ class FlagsContinuation final {
}
// Creates a new flags continuation for a wasm trap.
static FlagsContinuation ForTrap(FlagsCondition condition, TrapId trap_id,
Node* result) {
return FlagsContinuation(condition, trap_id, result);
static FlagsContinuation ForTrap(FlagsCondition condition, TrapId trap_id) {
return FlagsContinuation(condition, trap_id);
}
static FlagsContinuation ForSelect(FlagsCondition condition, Node* result,
......@@ -218,13 +216,8 @@ class FlagsContinuation final {
DCHECK_NOT_NULL(result);
}
FlagsContinuation(FlagsCondition condition, TrapId trap_id, Node* result)
: mode_(kFlags_trap),
condition_(condition),
frame_state_or_result_(result),
trap_id_(trap_id) {
DCHECK_NOT_NULL(result);
}
FlagsContinuation(FlagsCondition condition, TrapId trap_id)
: mode_(kFlags_trap), condition_(condition), trap_id_(trap_id) {}
FlagsContinuation(FlagsCondition condition, Node* result, Node* true_value,
Node* false_value)
......
......@@ -85,6 +85,7 @@
#include "src/compiler/turboshaft/graph.h"
#include "src/compiler/turboshaft/optimization-phase.h"
#include "src/compiler/turboshaft/recreate-schedule.h"
#include "src/compiler/turboshaft/simplify-tf-loops.h"
#include "src/compiler/turboshaft/value-numbering-assembler.h"
#include "src/compiler/type-narrowing-reducer.h"
#include "src/compiler/typed-optimization.h"
......@@ -2104,6 +2105,19 @@ struct WasmGCOptimizationPhase {
}
};
struct SimplifyLoopsPhase {
DECL_PIPELINE_PHASE_CONSTANTS(SimplifyLoops)
void Run(PipelineData* data, Zone* temp_zone) {
GraphReducer graph_reducer(
temp_zone, data->graph(), &data->info()->tick_counter(), data->broker(),
data->jsgraph()->Dead(), data->observe_node_manager());
SimplifyTFLoops simplify_loops(&graph_reducer, data->mcgraph());
AddReducer(data, &graph_reducer, &simplify_loops);
graph_reducer.ReduceGraph();
}
};
struct WasmGCLoweringPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmGCLowering)
......@@ -3384,10 +3398,12 @@ void Pipeline::GenerateCodeForWasmFunction(
pipeline.Run<MachineOperatorOptimizationPhase>();
pipeline.RunPrintAndVerify(MachineOperatorOptimizationPhase::phase_name(),
true);
if (!v8_flags.turboshaft_wasm) {
pipeline.Run<DecompressionOptimizationPhase>();
pipeline.RunPrintAndVerify(DecompressionOptimizationPhase::phase_name(),
true);
}
}
if (v8_flags.wasm_opt) {
pipeline.Run<BranchConditionDuplicationPhase>();
......@@ -3395,6 +3411,11 @@ void Pipeline::GenerateCodeForWasmFunction(
true);
}
if (v8_flags.turboshaft_wasm) {
pipeline.Run<SimplifyLoopsPhase>();
pipeline.RunPrintAndVerify(SimplifyLoopsPhase::phase_name(), true);
}
if (v8_flags.turbo_splitting && !is_asm_js) {
data.info()->set_splitting();
}
......@@ -3407,6 +3428,30 @@ void Pipeline::GenerateCodeForWasmFunction(
pipeline.ComputeScheduledGraph();
Linkage linkage(call_descriptor);
if (v8_flags.turboshaft_wasm) {
if (base::Optional<BailoutReason> bailout =
pipeline.Run<BuildTurboshaftPhase>()) {
pipeline.info()->AbortOptimization(*bailout);
data.EndPhaseKind();
info->SetWasmCompilationResult({});
return;
}
pipeline.Run<PrintTurboshaftGraphPhase>(BuildTurboshaftPhase::phase_name());
pipeline.Run<OptimizeTurboshaftPhase>();
pipeline.Run<PrintTurboshaftGraphPhase>(
OptimizeTurboshaftPhase::phase_name());
pipeline.Run<DecompressionOptimizationPhase>();
pipeline.Run<PrintTurboshaftGraphPhase>(
DecompressionOptimizationPhase::phase_name());
pipeline.Run<TurboshaftRecreateSchedulePhase>(&linkage);
TraceSchedule(data.info(), &data, data.schedule(),
TurboshaftRecreateSchedulePhase::phase_name());
}
if (!pipeline.SelectInstructions(&linkage)) return;
pipeline.AssembleCode(&linkage);
......
......@@ -11,16 +11,13 @@
#include <memory>
#include <type_traits>
#include "src/base/iterator.h"
#include "src/base/logging.h"
#include "src/base/macros.h"
#include "src/base/small-vector.h"
#include "src/base/template-utils.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/source-position.h"
#include "src/codegen/reloc-info.h"
#include "src/compiler/turboshaft/graph.h"
#include "src/compiler/turboshaft/operations.h"
#include "src/zone/zone-containers.h"
namespace v8::internal::compiler::turboshaft {
......@@ -359,9 +356,29 @@ class AssemblerInterface : public Superclass {
WordRepresentation::Word32())
DECL_SINGLE_REP_UNARY(Word64CountLeadingZeros, WordUnary, CountLeadingZeros,
WordRepresentation::Word64())
DECL_MULTI_REP_UNARY(WordCountTrailingZeros, WordUnary, WordRepresentation,
CountTrailingZeros)
DECL_SINGLE_REP_UNARY(Word32CountTrailingZeros, WordUnary, CountTrailingZeros,
WordRepresentation::Word32())
DECL_SINGLE_REP_UNARY(Word64CountTrailingZeros, WordUnary, CountTrailingZeros,
WordRepresentation::Word64())
DECL_MULTI_REP_UNARY(WordPopCount, WordUnary, WordRepresentation, PopCount)
DECL_SINGLE_REP_UNARY(Word32PopCount, WordUnary, PopCount,
WordRepresentation::Word32())
DECL_SINGLE_REP_UNARY(Word64PopCount, WordUnary, PopCount,
WordRepresentation::Word64())
#undef DECL_SINGLE_REP_UNARY
#undef DECL_MULTI_REP_UNARY
OpIndex Word32Select(OpIndex condition, OpIndex left, OpIndex right) {
return subclass().Select(condition, left, right,
WordRepresentation::Word32());
}
OpIndex Word64Select(OpIndex condition, OpIndex left, OpIndex right) {
return subclass().Select(condition, left, right,
WordRepresentation::Word64());
}
OpIndex Word32Constant(uint32_t value) {
return subclass().Constant(ConstantOp::Kind::kWord32, uint64_t{value});
}
......@@ -413,6 +430,13 @@ class AssemblerInterface : public Superclass {
OpIndex ExternalConstant(ExternalReference value) {
return subclass().Constant(ConstantOp::Kind::kExternal, value);
}
OpIndex RelocatableConstant(int64_t value, RelocInfo::Mode mode) {
DCHECK_EQ(mode, any_of(RelocInfo::WASM_CALL, RelocInfo::WASM_STUB_CALL));
return subclass().Constant(mode == RelocInfo::WASM_CALL
? ConstantOp::Kind::kRelocatableWasmCall
: ConstantOp::Kind::kRelocatableWasmStubCall,
static_cast<uint64_t>(value));
}
#define DECL_CHANGE(name, kind, from, to) \
OpIndex name(OpIndex input) { \
......
......@@ -5,6 +5,7 @@
#ifndef V8_COMPILER_TURBOSHAFT_DEOPT_DATA_H_
#define V8_COMPILER_TURBOSHAFT_DEOPT_DATA_H_
#include "src/base/small-vector.h"
#include "src/common/globals.h"
#include "src/compiler/turboshaft/operations.h"
......
......@@ -19,7 +19,6 @@
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-aux-data.h"
#include "src/compiler/node-origin-table.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/opcodes.h"
#include "src/compiler/operator.h"
#include "src/compiler/schedule.h"
......@@ -202,6 +201,7 @@ base::Optional<BailoutReason> GraphBuilder::Run() {
case BasicBlock::kReturn:
case BasicBlock::kDeoptimize:
case BasicBlock::kThrow:
case BasicBlock::kTailCall:
break;
case BasicBlock::kCall: {
Node* call = block->control_input();
......@@ -216,8 +216,6 @@ base::Optional<BailoutReason> GraphBuilder::Run() {
op_mapping.Set(if_exception_node, catch_exception);
break;
}
case BasicBlock::kTailCall:
UNIMPLEMENTED();
case BasicBlock::kNone:
UNREACHABLE();
}
......@@ -323,7 +321,10 @@ OpIndex GraphBuilder::Process(
return assembler.CompressedHeapConstant(HeapConstantOf(op));
case IrOpcode::kExternalConstant:
return assembler.ExternalConstant(OpParameter<ExternalReference>(op));
case IrOpcode::kRelocatableInt64Constant:
return assembler.RelocatableConstant(
OpParameter<RelocatablePtrConstantInfo>(op).value(),
OpParameter<RelocatablePtrConstantInfo>(op).rmode());
#define BINOP_CASE(opcode, assembler_op) \
case IrOpcode::k##opcode: \
return assembler.assembler_op(Map(node->InputAt(0)), Map(node->InputAt(1)));
......@@ -431,6 +432,10 @@ OpIndex GraphBuilder::Process(
UNARY_CASE(Word64ReverseBytes, Word64ReverseBytes)
UNARY_CASE(Word32Clz, Word32CountLeadingZeros)
UNARY_CASE(Word64Clz, Word64CountLeadingZeros)
UNARY_CASE(Word32Ctz, Word32CountTrailingZeros)
UNARY_CASE(Word64Ctz, Word64CountTrailingZeros)
UNARY_CASE(Word32Popcnt, Word32PopCount)
UNARY_CASE(Word64Popcnt, Word64PopCount)
UNARY_CASE(Float32Abs, Float32Abs)
UNARY_CASE(Float64Abs, Float64Abs)
......@@ -481,9 +486,17 @@ OpIndex GraphBuilder::Process(
CHANGE_CASE(BitcastInt64ToFloat64, Bitcast, Word64, Float64)
CHANGE_CASE(ChangeUint32ToUint64, ZeroExtend, Word32, Word64)
CHANGE_CASE(ChangeInt32ToInt64, SignExtend, Word32, Word64)
CHANGE_CASE(ChangeInt32ToFloat64, SignedToFloat, Word32, Float64)
CHANGE_CASE(ChangeInt64ToFloat64, SignedToFloat, Word64, Float64)
CHANGE_CASE(SignExtendWord32ToInt64, SignExtend, Word32, Word64)
CHANGE_CASE(ChangeInt32ToFloat64, SignedNarrowing, Word32, Float64)
CHANGE_CASE(ChangeInt64ToFloat64, SignedNarrowing, Word64, Float64)
CHANGE_CASE(ChangeUint32ToFloat64, UnsignedToFloat, Word32, Float64)
CHANGE_CASE(RoundInt64ToFloat64, SignedToFloat, Word64, Float64)
CHANGE_CASE(RoundUint64ToFloat64, UnsignedToFloat, Word64, Float64)
CHANGE_CASE(RoundInt32ToFloat32, SignedToFloat, Word32, Float32)
CHANGE_CASE(RoundUint32ToFloat32, UnsignedToFloat, Word32, Float32)
CHANGE_CASE(RoundInt64ToFloat32, SignedToFloat, Word64, Float32)
CHANGE_CASE(RoundUint64ToFloat32, UnsignedToFloat, Word64, Float32)
CHANGE_CASE(TruncateFloat64ToWord32, JSFloatTruncate, Float64, Word32)
CHANGE_CASE(TruncateFloat64ToFloat32, FloatConversion, Float64, Float32)
CHANGE_CASE(ChangeFloat32ToFloat64, FloatConversion, Float32, Float64)
......@@ -498,21 +511,36 @@ OpIndex GraphBuilder::Process(
case IrOpcode::kTruncateInt64ToInt32:
// 64- to 32-bit truncation is implicit in Turboshaft.
return Map(node->InputAt(0));
case IrOpcode::kTruncateFloat64ToInt64: {
ChangeOp::Kind kind;
switch (OpParameter<TruncateKind>(op)) {
case TruncateKind::kArchitectureDefault:
kind = ChangeOp::Kind::kSignedFloatTruncate;
break;
case TruncateKind::kSetOverflowToMin:
kind = ChangeOp::Kind::kSignedFloatTruncateOverflowToMin;
break;
case IrOpcode::kTruncateFloat32ToInt32: {
ChangeOp::Kind kind =
OpParameter<TruncateKind>(node->op()) ==
TruncateKind::kArchitectureDefault
? ChangeOp::Kind::kSignedFloatTruncate
: ChangeOp::Kind::kSignedFloatTruncateOverflowToMin;
return assembler.Change(Map(node->InputAt(0)), kind,
RegisterRepresentation::Float32(),
RegisterRepresentation::Word32());
}
case IrOpcode::kTruncateFloat32ToUint32: {
ChangeOp::Kind kind =
OpParameter<TruncateKind>(node->op()) ==
TruncateKind::kArchitectureDefault
? ChangeOp::Kind::kUnsignedFloatTruncate
: ChangeOp::Kind::kUnsignedFloatTruncateOverflowToMin;
return assembler.Change(Map(node->InputAt(0)), kind,
RegisterRepresentation::Float32(),
RegisterRepresentation::Word32());
}
case IrOpcode::kTruncateFloat64ToInt64: {
ChangeOp::Kind kind =
OpParameter<TruncateKind>(node->op()) ==
TruncateKind::kArchitectureDefault
? ChangeOp::Kind::kSignedFloatTruncate
: ChangeOp::Kind::kSignedFloatTruncateOverflowToMin;
return assembler.Change(Map(node->InputAt(0)), kind,
RegisterRepresentation::Float64(),
RegisterRepresentation::Word64());
}
case IrOpcode::kFloat64InsertLowWord32:
return assembler.Float64InsertWord32(
Map(node->InputAt(0)), Map(node->InputAt(1)),
......@@ -531,16 +559,27 @@ OpIndex GraphBuilder::Process(
RegisterRepresentation::PointerSized(),
RegisterRepresentation::Tagged());
case IrOpcode::kWord32Select:
return assembler.Word32Select(
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)));
case IrOpcode::kWord64Select:
return assembler.Word64Select(
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)));
case IrOpcode::kLoad:
case IrOpcode::kLoadImmutable:
case IrOpcode::kProtectedLoad:
case IrOpcode::kUnalignedLoad: {
MemoryRepresentation loaded_rep =
MemoryRepresentation::FromMachineType(LoadRepresentationOf(op));
RegisterRepresentation result_rep = loaded_rep.ToRegisterRepresentation();
Node* base = node->InputAt(0);
Node* index = node->InputAt(1);
LoadOp::Kind kind = opcode == IrOpcode::kLoad
? LoadOp::Kind::kRawAligned
: LoadOp::Kind::kRawUnaligned;
// It's ok to merge LoadImmutable into Load after scheduling.
LoadOp::Kind kind =
opcode == IrOpcode::kUnalignedLoad ? LoadOp::Kind::kRawUnaligned
: opcode == IrOpcode::kProtectedLoad ? LoadOp::Kind::kProtected
: LoadOp::Kind::kRawAligned;
if (index->opcode() == IrOpcode::kInt32Constant) {
int32_t offset = OpParameter<int32_t>(index->op());
return assembler.Load(Map(base), kind, loaded_rep, result_rep, offset);
......@@ -559,14 +598,17 @@ OpIndex GraphBuilder::Process(
}
case IrOpcode::kStore:
case IrOpcode::kUnalignedStore: {
bool aligned = opcode == IrOpcode::kStore;
case IrOpcode::kUnalignedStore:
case IrOpcode::kProtectedStore: {
bool aligned = opcode != IrOpcode::kUnalignedStore;
StoreRepresentation store_rep =
aligned ? StoreRepresentationOf(op)
: StoreRepresentation(UnalignedStoreRepresentationOf(op),
WriteBarrierKind::kNoWriteBarrier);
StoreOp::Kind kind =
aligned ? StoreOp::Kind::kRawAligned : StoreOp::Kind::kRawUnaligned;
opcode == IrOpcode::kStore ? StoreOp::Kind::kRawAligned
: opcode == IrOpcode::kUnalignedStore ? StoreOp::Kind::kRawAligned
: StoreOp::Kind::kProtected;
Node* base = node->InputAt(0);
Node* index = node->InputAt(1);
Node* value = node->InputAt(2);
......@@ -654,6 +696,21 @@ OpIndex GraphBuilder::Process(
return call;
}
case IrOpcode::kTailCall: {
auto call_descriptor = CallDescriptorOf(op);
base::SmallVector<OpIndex, 16> arguments;
// The input `0` is the callee, the following value inputs are the
// arguments. `CallDescriptor::InputCount()` counts the callee and
// arguments.
OpIndex callee = Map(node->InputAt(0));
for (int i = 1; i < static_cast<int>(call_descriptor->InputCount());
++i) {
arguments.emplace_back(Map(node->InputAt(i)));
}
return assembler.TailCall(callee, base::VectorOf(arguments),
call_descriptor);
}
case IrOpcode::kFrameState: {
FrameState frame_state{node};
FrameStateData::Builder builder;
......@@ -673,6 +730,13 @@ OpIndex GraphBuilder::Process(
&DeoptimizeParametersOf(op));
}
case IrOpcode::kTrapIf:
case IrOpcode::kTrapUnless: {
OpIndex condition = Map(node->InputAt(0));
bool negated = op->opcode() == IrOpcode::kTrapUnless;
return assembler.TrapIf(condition, negated, TrapIdOf(op));
}
case IrOpcode::kDeoptimize: {
OpIndex frame_state = Map(node->InputAt(0));
return assembler.Deoptimize(frame_state, &DeoptimizeParametersOf(op));
......
......@@ -8,9 +8,7 @@
#include <sstream>
#include "src/base/platform/mutex.h"
#include "src/base/platform/platform.h"
#include "src/codegen/machine-type.h"
#include "src/common/assert-scope.h"
#include "src/common/globals.h"
#include "src/compiler/backend/instruction-selector.h"
#include "src/compiler/frame-states.h"
......@@ -49,6 +47,10 @@ std::ostream& operator<<(std::ostream& os, WordUnaryOp::Kind kind) {
return os << "ReverseBytes";
case WordUnaryOp::Kind::kCountLeadingZeros:
return os << "CountLeadingZeros";
case WordUnaryOp::Kind::kCountTrailingZeros:
return os << "CountTrailingZeros";
case WordUnaryOp::Kind::kPopCount:
return os << "PopCount";
}
}
......@@ -143,6 +145,23 @@ bool FloatUnaryOp::IsSupported(Kind kind, FloatRepresentation rep) {
}
}
// static
bool WordUnaryOp::IsSupported(Kind kind, WordRepresentation rep) {
switch (kind) {
case Kind::kCountLeadingZeros:
case Kind::kReverseBytes:
return true;
case Kind::kCountTrailingZeros:
return rep == WordRepresentation::Word32()
? SupportedOperations::word32_ctz()
: SupportedOperations::word64_ctz();
case Kind::kPopCount:
return rep == WordRepresentation::Word32()
? SupportedOperations::word32_popcnt()
: SupportedOperations::word64_popcnt();
}
}
std::ostream& operator<<(std::ostream& os, ShiftOp::Kind kind) {
switch (kind) {
case ShiftOp::Kind::kShiftRightArithmeticShiftOutZeros:
......@@ -181,12 +200,16 @@ std::ostream& operator<<(std::ostream& os, ChangeOp::Kind kind) {
return os << "UnsignedNarrowing";
case ChangeOp::Kind::kFloatConversion:
return os << "FloatConversion";
case ChangeOp::Kind::kSignedFloatTruncate:
return os << "SignedFloatTruncate";
case ChangeOp::Kind::kJSFloatTruncate:
return os << "JSFloatTruncate";
case ChangeOp::Kind::kSignedFloatTruncate:
return os << "SignedFloatTruncate";
case ChangeOp::Kind::kSignedFloatTruncateOverflowToMin:
return os << "SignedFloatTruncateOverflowToMin";
case ChangeOp::Kind::kUnsignedFloatTruncate:
return os << "UnsignedFloatTruncate";
case ChangeOp::Kind::kUnsignedFloatTruncateOverflowToMin:
return os << "UnsignedFloatTruncateOverflowToMin";
case ChangeOp::Kind::kSignedToFloat:
return os << "SignedToFloat";
case ChangeOp::Kind::kUnsignedToFloat:
......@@ -269,6 +292,14 @@ void ConstantOp::PrintOptions(std::ostream& os) const {
case Kind::kCompressedHeapObject:
os << "compressed heap object: " << handle();
break;
case Kind::kRelocatableWasmCall:
os << "relocatable wasm call: 0x"
<< reinterpret_cast<void*>(storage.integral);
break;
case Kind::kRelocatableWasmStubCall:
os << "relocatable wasm stub call: 0x"
<< reinterpret_cast<void*>(storage.integral);
break;
}
os << "]";
}
......@@ -485,8 +516,8 @@ std::ostream& operator<<(std::ostream& os, OpProperties opProperties) {
os << "Reading";
} else if (opProperties == OpProperties::Writing()) {
os << "Writing";
} else if (opProperties == OpProperties::CanDeopt()) {
os << "CanDeopt";
} else if (opProperties == OpProperties::CanAbort()) {
os << "CanAbort";
} else if (opProperties == OpProperties::AnySideEffects()) {
os << "AnySideEffects";
} else if (opProperties == OpProperties::BlockTerminator()) {
......
This diff is collapsed.
......@@ -402,6 +402,11 @@ struct OptimizationPhase<Analyzer, Assembler>::Impl {
auto arguments = MapToNewGraph<16>(op.arguments());
return assembler.Call(callee, base::VectorOf(arguments), op.descriptor);
}
OpIndex ReduceTailCall(const TailCallOp& op) {
OpIndex callee = MapToNewGraph(op.callee());
auto arguments = MapToNewGraph<16>(op.arguments());
return assembler.TailCall(callee, base::VectorOf(arguments), op.descriptor);
}
OpIndex ReduceReturn(const ReturnOp& op) {
// We very rarely have tuples longer than 4.
auto return_values = MapToNewGraph<4>(op.return_values());
......@@ -440,6 +445,11 @@ struct OptimizationPhase<Analyzer, Assembler>::Impl {
OpIndex ReduceTaggedBitcast(const TaggedBitcastOp& op) {
return assembler.TaggedBitcast(MapToNewGraph(op.input()), op.from, op.to);
}
OpIndex ReduceSelect(const SelectOp& op) {
return assembler.Select(MapToNewGraph(op.condition()),
MapToNewGraph(op.left()), MapToNewGraph(op.right()),
op.rep);
}
OpIndex ReduceConstant(const ConstantOp& op) {
return assembler.Constant(op.kind, op.storage);
}
......@@ -493,6 +503,10 @@ struct OptimizationPhase<Analyzer, Assembler>::Impl {
MapToNewGraph(op.frame_state()), op.negated,
op.parameters);
}
OpIndex ReduceTrapIf(const TrapIfOp& op) {
return assembler.TrapIf(MapToNewGraph(op.condition()), op.negated,
op.trap_id);
}
OpIndex ReduceTuple(const TupleOp& op) {
return assembler.Tuple(base::VectorOf(MapToNewGraph<4>(op.inputs())));
}
......
......@@ -88,6 +88,13 @@ struct ScheduleBuilder {
return AddNode(machine.Is64() ? machine.Word64Shl() : machine.Word32Shl(),
{a, b});
}
Node* RelocatableIntPtrConstant(intptr_t value, RelocInfo::Mode mode) {
return AddNode(machine.Is64()
? common.RelocatableInt64Constant(value, mode)
: common.RelocatableInt32Constant(
base::checked_cast<int32_t>(value), mode),
{});
}
void ProcessOperation(const Operation& op);
#define DECL_PROCESS_OPERATION(Name) Node* ProcessOperation(const Name##Op& op);
TURBOSHAFT_OPERATION_LIST(DECL_PROCESS_OPERATION)
......@@ -363,6 +370,12 @@ Node* ScheduleBuilder::ProcessOperation(const WordUnaryOp& op) {
case WordUnaryOp::Kind::kCountLeadingZeros:
o = word64 ? machine.Word64Clz() : machine.Word32Clz();
break;
case WordUnaryOp::Kind::kCountTrailingZeros:
o = word64 ? machine.Word64Ctz().op() : machine.Word32Ctz().op();
break;
case WordUnaryOp::Kind::kPopCount:
o = word64 ? machine.Word64Popcnt().op() : machine.Word32Popcnt().op();
break;
}
return AddNode(o, {GetNode(op.input())});
}
......@@ -628,6 +641,22 @@ Node* ScheduleBuilder::ProcessOperation(const ChangeOp& op) {
UNIMPLEMENTED();
}
break;
case Kind::kUnsignedFloatTruncate:
if (op.from == FloatRepresentation::Float32() &&
op.to == WordRepresentation::Word32()) {
o = machine.TruncateFloat32ToUint32(TruncateKind::kArchitectureDefault);
} else {
UNIMPLEMENTED();
}
break;
case Kind::kUnsignedFloatTruncateOverflowToMin:
if (op.from == FloatRepresentation::Float32() &&
op.to == WordRepresentation::Word32()) {
o = machine.TruncateFloat32ToUint32(TruncateKind::kSetOverflowToMin);
} else {
UNIMPLEMENTED();
}
break;
case Kind::kJSFloatTruncate:
if (op.from == FloatRepresentation::Float64() &&
op.to == WordRepresentation::Word32()) {
......@@ -642,7 +671,16 @@ Node* ScheduleBuilder::ProcessOperation(const ChangeOp& op) {
o = machine.ChangeInt32ToFloat64();
} else if (op.from == WordRepresentation::Word64() &&
op.to == FloatRepresentation::Float64()) {
o = machine.ChangeInt64ToFloat64();
o = machine.RoundInt64ToFloat64();
} else if (op.from == WordRepresentation::Word32() &&
op.to == FloatRepresentation::Float32()) {
o = machine.RoundInt32ToFloat32();
} else if (op.from == WordRepresentation::Word64() &&
op.to == FloatRepresentation::Float32()) {
o = machine.RoundInt64ToFloat32();
} else if (op.from == WordRepresentation::Word32() &&
op.to == FloatRepresentation::Float64()) {
o = machine.ChangeInt32ToFloat64();
} else {
UNIMPLEMENTED();
}
......@@ -651,6 +689,15 @@ Node* ScheduleBuilder::ProcessOperation(const ChangeOp& op) {
if (op.from == WordRepresentation::Word32() &&
op.to == FloatRepresentation::Float64()) {
o = machine.ChangeUint32ToFloat64();
} else if (op.from == WordRepresentation::Word32() &&
op.to == FloatRepresentation::Float32()) {
o = machine.RoundUint32ToFloat32();
} else if (op.from == WordRepresentation::Word64() &&
op.to == FloatRepresentation::Float32()) {
o = machine.RoundUint64ToFloat32();
} else if (op.from == WordRepresentation::Word64() &&
op.to == FloatRepresentation::Float64()) {
o = machine.RoundUint64ToFloat64();
} else {
UNIMPLEMENTED();
}
......@@ -708,6 +755,12 @@ Node* ScheduleBuilder::ProcessOperation(const ChangeOp& op) {
} else if (op.from == FloatRepresentation::Float64() &&
op.to == WordRepresentation::Word32()) {
o = machine.ChangeFloat64ToInt32();
} else if (op.from == WordRepresentation::Word32() &&
op.to == FloatRepresentation::Float64()) {
o = machine.ChangeInt32ToFloat64();
} else if (op.from == WordRepresentation::Word64() &&
op.to == FloatRepresentation::Float64()) {
o = machine.ChangeInt64ToFloat64();
} else {
UNIMPLEMENTED();
}
......@@ -749,6 +802,14 @@ Node* ScheduleBuilder::ProcessOperation(const TaggedBitcastOp& op) {
}
return AddNode(o, {GetNode(op.input())});
}
Node* ScheduleBuilder::ProcessOperation(const SelectOp& op) {
const Operator* o = op.rep == WordRepresentation::Word32()
? machine.Word32Select().op()
: machine.Word64Select().op();
return AddNode(
o, {GetNode(op.condition()), GetNode(op.left()), GetNode(op.right())});
}
Node* ScheduleBuilder::ProcessOperation(const PendingLoopPhiOp& op) {
UNREACHABLE();
}
......@@ -779,6 +840,11 @@ Node* ScheduleBuilder::ProcessOperation(const ConstantOp& op) {
return AddNode(common.Float64Constant(op.float64()), {});
case ConstantOp::Kind::kFloat32:
return AddNode(common.Float32Constant(op.float32()), {});
case ConstantOp::Kind::kRelocatableWasmCall:
return RelocatableIntPtrConstant(op.integral(), RelocInfo::WASM_CALL);
case ConstantOp::Kind::kRelocatableWasmStubCall:
return RelocatableIntPtrConstant(op.integral(),
RelocInfo::WASM_STUB_CALL);
}
}
Node* ScheduleBuilder::ProcessOperation(const LoadOp& op) {
......@@ -788,9 +854,11 @@ Node* ScheduleBuilder::ProcessOperation(const LoadOp& op) {
offset -= kHeapObjectTag;
}
Node* base = GetNode(op.base());
return AddNode(IsAlignedAccess(op.kind)
return AddNode(op.kind == LoadOp::Kind::kRawAligned
? machine.Load(op.loaded_rep.ToMachineType())
: machine.UnalignedLoad(op.loaded_rep.ToMachineType()),
: op.kind == LoadOp::Kind::kRawUnaligned
? machine.UnalignedLoad(op.loaded_rep.ToMachineType())
: machine.ProtectedLoad(op.loaded_rep.ToMachineType()),
{base, IntPtrConstant(offset)});
}
Node* ScheduleBuilder::ProcessOperation(const IndexedLoadOp& op) {
......@@ -815,8 +883,10 @@ Node* ScheduleBuilder::ProcessOperation(const IndexedLoadOp& op) {
loaded_rep = MachineType::CompressedPointer();
}
}
return AddNode(IsAlignedAccess(op.kind) ? machine.Load(loaded_rep)
: machine.UnalignedLoad(loaded_rep),
return AddNode(op.kind == LoadOp::Kind::kRawAligned ? machine.Load(loaded_rep)
: op.kind == LoadOp::Kind::kRawUnaligned
? machine.UnalignedLoad(loaded_rep)
: machine.ProtectedLoad(loaded_rep),
{base, index});
}
Node* ScheduleBuilder::ProcessOperation(const StoreOp& op) {
......@@ -831,6 +901,9 @@ Node* ScheduleBuilder::ProcessOperation(const StoreOp& op) {
if (IsAlignedAccess(op.kind)) {
o = machine.Store(StoreRepresentation(
op.stored_rep.ToMachineType().representation(), op.write_barrier));
} else if (op.kind == LoadOp::Kind::kProtected) {
DCHECK_EQ(op.write_barrier, WriteBarrierKind::kNoWriteBarrier);
o = machine.ProtectedStore(op.stored_rep.ToMachineType().representation());
} else {
DCHECK_EQ(op.write_barrier, WriteBarrierKind::kNoWriteBarrier);
o = machine.UnalignedStore(op.stored_rep.ToMachineType().representation());
......@@ -856,6 +929,9 @@ Node* ScheduleBuilder::ProcessOperation(const IndexedStoreOp& op) {
if (IsAlignedAccess(op.kind)) {
o = machine.Store(StoreRepresentation(
op.stored_rep.ToMachineType().representation(), op.write_barrier));
} else if (op.kind == LoadOp::Kind::kProtected) {
DCHECK_EQ(op.write_barrier, WriteBarrierKind::kNoWriteBarrier);
o = machine.ProtectedStore(op.stored_rep.ToMachineType().representation());
} else {
DCHECK_EQ(op.write_barrier, WriteBarrierKind::kNoWriteBarrier);
o = machine.UnalignedStore(op.stored_rep.ToMachineType().representation());
......@@ -927,6 +1003,12 @@ Node* ScheduleBuilder::ProcessOperation(const DeoptimizeIfOp& op) {
op.parameters->feedback());
return AddNode(o, {condition, frame_state});
}
Node* ScheduleBuilder::ProcessOperation(const TrapIfOp& op) {
Node* condition = GetNode(op.condition());
const Operator* o =
op.negated ? common.TrapUnless(op.trap_id) : common.TrapIf(op.trap_id);
return AddNode(o, {condition});
}
Node* ScheduleBuilder::ProcessOperation(const DeoptimizeOp& op) {
Node* frame_state = GetNode(op.frame_state());
const Operator* o =
......@@ -1087,6 +1169,17 @@ Node* ScheduleBuilder::ProcessOperation(const CallOp& op) {
}
return AddNode(common.Call(op.descriptor), base::VectorOf(inputs));
}
Node* ScheduleBuilder::ProcessOperation(const TailCallOp& op) {
base::SmallVector<Node*, 16> inputs;
inputs.push_back(GetNode(op.callee()));
for (OpIndex i : op.arguments()) {
inputs.push_back(GetNode(i));
}
Node* call = MakeNode(common.TailCall(op.descriptor), base::VectorOf(inputs));
schedule->AddTailCall(current_block, call);
current_block = nullptr;
return nullptr;
}
Node* ScheduleBuilder::ProcessOperation(const UnreachableOp& op) {
Node* node = MakeNode(common.Throw(), {});
schedule->AddThrow(current_block, node);
......
......@@ -7,7 +7,7 @@
#include "src/compiler/compiler-source-position-table.h"
#include "src/compiler/node-origin-table.h"
#include "src/compiler/node.h"
namespace v8::internal {
class Zone;
}
......
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/turboshaft/simplify-tf-loops.h"
#include "src/base/small-vector.h"
#include "src/compiler/machine-graph.h"
#include "src/compiler/node-properties.h"
namespace v8::internal::compiler {
Reduction SimplifyTFLoops::Reduce(Node* node) {
if (node->opcode() != IrOpcode::kLoop) return NoChange();
if (node->InputCount() <= 2) return NoChange();
Node* new_loop = mcgraph_->graph()->NewNode(mcgraph_->common()->Loop(2),
node->InputAt(0), node);
node->RemoveInput(0);
NodeProperties::ChangeOp(node, mcgraph_->common()->Merge(node->InputCount()));
base::SmallVector<Edge, 4> control_uses;
for (Edge edge : node->use_edges()) {
Node* use = edge.from();
if (!NodeProperties::IsPhi(use)) {
control_uses.emplace_back(edge);
continue;
}
Node* dominating_input = use->InputAt(0);
use->RemoveInput(0);
NodeProperties::ChangeOp(
use, use->opcode() == IrOpcode::kPhi
? mcgraph_->common()->Phi(PhiRepresentationOf(use->op()),
use->InputCount() - 1)
: mcgraph_->common()->EffectPhi(use->InputCount() - 1));
Node* new_phi = mcgraph_->graph()->NewNode(
use->opcode() == IrOpcode::kPhi
? mcgraph_->common()->Phi(PhiRepresentationOf(use->op()), 2)
: mcgraph_->common()->EffectPhi(2),
dominating_input, use, new_loop);
ReplaceWithValue(use, new_phi, new_phi, new_phi);
// Restore the use <- new_phi edge we just broke.
new_phi->ReplaceInput(1, use);
}
for (Edge edge : control_uses) {
if (edge.from() != new_loop) {
edge.from()->ReplaceInput(edge.index(), new_loop);
}
}
return NoChange();
}
} // namespace v8::internal::compiler
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_TURBOSHAFT_SIMPLIFY_TF_LOOPS_H_
#define V8_COMPILER_TURBOSHAFT_SIMPLIFY_TF_LOOPS_H_
#include "src/compiler/graph-reducer.h"
namespace v8::internal::compiler {
class MachineGraph;
// Constrain loop nodes to have at most two inputs, by introducing additional
// merges as needed.
class SimplifyTFLoops final : public AdvancedReducer {
public:
SimplifyTFLoops(Editor* editor, MachineGraph* mcgraph)
: AdvancedReducer(editor), mcgraph_(mcgraph) {}
const char* reducer_name() const override { return "SimplifyTFLoops"; }
Reduction Reduce(Node* node) final;
private:
MachineGraph* const mcgraph_;
};
} // namespace v8::internal::compiler
#endif // V8_COMPILER_TURBOSHAFT_SIMPLIFY_TF_LOOPS_H_
......@@ -963,10 +963,12 @@ DEFINE_FLOAT(script_delay_fraction, 0.0,
"busy wait after each Script::Run by the given fraction of the "
"run's duration")
DEFINE_BOOL(turboshaft, false, "enable TurboFan's Turboshaft phases")
DEFINE_BOOL(turboshaft, false, "enable TurboFan's Turboshaft phases for JS")
DEFINE_WEAK_IMPLICATION(future, turboshaft)
DEFINE_BOOL(turboshaft_trace_reduction, false,
"trace individual Turboshaft reduction steps")
DEFINE_BOOL(turboshaft_wasm, false,
"enable TurboFan's Turboshaft phases for wasm")
// Favor memory over execution speed.
DEFINE_BOOL(optimize_for_size, false,
......
......@@ -366,6 +366,7 @@ class RuntimeCallTimer final {
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, Scheduling) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SelectInstructions) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SimplifiedLowering) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, SimplifyLoops) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, StoreStoreElimination) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, TraceScheduleAndVerify) \
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, BuildTurboshaft) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment