// Copyright 2014 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/compiler/basic-block-instrumentor.h" #include <sstream> #include "src/codegen/optimized-compilation-info.h" #include "src/compiler/common-operator.h" #include "src/compiler/graph.h" #include "src/compiler/machine-operator.h" #include "src/compiler/node.h" #include "src/compiler/operator-properties.h" #include "src/compiler/schedule.h" #include "src/objects/objects-inl.h" namespace v8 { namespace internal { namespace compiler { // Find the first place to insert new nodes in a block that's already been // scheduled that won't upset the register allocator. static NodeVector::iterator FindInsertionPoint(BasicBlock* block) { NodeVector::iterator i = block->begin(); for (; i != block->end(); ++i) { const Operator* op = (*i)->op(); if (OperatorProperties::IsBasicBlockBegin(op)) continue; switch (op->opcode()) { case IrOpcode::kParameter: case IrOpcode::kPhi: case IrOpcode::kEffectPhi: continue; } break; } return i; } static const Operator* IntPtrConstant(CommonOperatorBuilder* common, intptr_t value) { return kSystemPointerSize == 8 ? common->Int64Constant(value) : common->Int32Constant(static_cast<int32_t>(value)); } // TODO(dcarney): need to mark code as non-serializable. static const Operator* PointerConstant(CommonOperatorBuilder* common, const void* ptr) { intptr_t ptr_as_int = reinterpret_cast<intptr_t>(ptr); return IntPtrConstant(common, ptr_as_int); } BasicBlockProfilerData* BasicBlockInstrumentor::Instrument( OptimizedCompilationInfo* info, Graph* graph, Schedule* schedule, Isolate* isolate) { // Basic block profiling disables concurrent compilation, so handle deref is // fine. AllowHandleDereference allow_handle_dereference; // Skip the exit block in profiles, since the register allocator can't handle // it and entry into it means falling off the end of the function anyway. size_t n_blocks = schedule->RpoBlockCount() - 1; BasicBlockProfilerData* data = BasicBlockProfiler::Get()->NewData(n_blocks); // Set the function name. data->SetFunctionName(info->GetDebugName()); // Capture the schedule string before instrumentation. if (FLAG_turbo_profiling_verbose) { std::ostringstream os; os << *schedule; data->SetSchedule(os); } // Check whether we should write counts to a JS heap object or to the // BasicBlockProfilerData directly. The JS heap object is only used for // builtins. bool on_heap_counters = isolate && isolate->IsGeneratingEmbeddedBuiltins(); // Add the increment instructions to the start of every block. CommonOperatorBuilder common(graph->zone()); MachineOperatorBuilder machine(graph->zone()); Node* counters_array = nullptr; if (on_heap_counters) { // Allocation is disallowed here, so rather than referring to an actual // counters array, create a reference to a special marker object. This // object will get fixed up later in the constants table (see // PatchBasicBlockCountersReference). An important and subtle point: we // cannot use the root handle basic_block_counters_marker_handle() and must // create a new separate handle. Otherwise // TurboAssemblerBase::IndirectLoadConstant would helpfully emit a // root-relative load rather than putting this value in the constants table // where we expect it to be for patching. counters_array = graph->NewNode(common.HeapConstant(Handle<HeapObject>::New( ReadOnlyRoots(isolate).basic_block_counters_marker(), isolate))); } else { counters_array = graph->NewNode(PointerConstant(&common, data->counts())); } Node* zero = graph->NewNode(common.Int32Constant(0)); Node* one = graph->NewNode(common.Int32Constant(1)); BasicBlockVector* blocks = schedule->rpo_order(); size_t block_number = 0; for (BasicBlockVector::iterator it = blocks->begin(); block_number < n_blocks; ++it, ++block_number) { BasicBlock* block = (*it); // Iteration is already in reverse post-order. DCHECK_EQ(block->rpo_number(), block_number); data->SetBlockId(block_number, block->id().ToInt()); // It is unnecessary to wire effect and control deps for load and store // since this happens after scheduling. // Construct increment operation. int offset_to_counter_value = static_cast<int>(block_number) * kInt32Size; if (on_heap_counters) { offset_to_counter_value += ByteArray::kHeaderSize - kHeapObjectTag; } Node* offset_to_counter = graph->NewNode(IntPtrConstant(&common, offset_to_counter_value)); Node* load = graph->NewNode(machine.Load(MachineType::Uint32()), counters_array, offset_to_counter, graph->start(), graph->start()); Node* inc = graph->NewNode(machine.Int32Add(), load, one); // Branchless saturation, because we've already run the scheduler, so // introducing extra control flow here would be surprising. Node* overflow = graph->NewNode(machine.Uint32LessThan(), inc, load); Node* overflow_mask = graph->NewNode(machine.Int32Sub(), zero, overflow); Node* saturated_inc = graph->NewNode(machine.Word32Or(), inc, overflow_mask); Node* store = graph->NewNode(machine.Store(StoreRepresentation( MachineRepresentation::kWord32, kNoWriteBarrier)), counters_array, offset_to_counter, saturated_inc, graph->start(), graph->start()); // Insert the new nodes. static const int kArraySize = 10; Node* to_insert[kArraySize] = { counters_array, zero, one, offset_to_counter, load, inc, overflow, overflow_mask, saturated_inc, store}; // The first three Nodes are constant across all blocks. int insertion_start = block_number == 0 ? 0 : 3; NodeVector::iterator insertion_point = FindInsertionPoint(block); block->InsertNodes(insertion_point, &to_insert[insertion_start], &to_insert[kArraySize]); // Tell the scheduler about the new nodes. for (int i = insertion_start; i < kArraySize; ++i) { schedule->SetBlockForNode(block, to_insert[i]); } } return data; } } // namespace compiler } // namespace internal } // namespace v8