Commit 004ce08d authored by mlippautz's avatar mlippautz Committed by Commit bot

Reland of "[heap] Parallel newspace evacuation, semispace copy, and compaction \o/"

This reverts commit 85ba94f2.

All parallelism can be turned off using --predictable, or --noparallel-compaction.

This patch completely parallelizes
 - semispace copy: from space -> to space (within newspace)
 - newspace evacuation: newspace -> oldspace
 - oldspace compaction: oldspace -> oldspace

Previously newspace has been handled sequentially (semispace copy, newspace
evacuation) before compacting oldspace in parallel. However, on a high level
there are no dependencies between those two actions, hence we parallelize them
altogether. We base the number of evacuation tasks on the overall set of
to-be-processed pages (newspace + oldspace compaction pages).

Some low-level details:
 - The hard cap on number of tasks has been lifted
 - We cache store buffer entries locally before merging them back into the global
   StoreBuffer in a finalization phase.
 - We cache AllocationSite operations locally before merging them back into the
   global pretenuring storage in a finalization phase.
 - AllocationSite might be compacted while they would be needed for newspace
   evacuation. To mitigate any problems we defer checking allocation sites for
   newspace till merging locally buffered data.

CQ_EXTRA_TRYBOTS=tryserver.v8:v8_linux_arm64_gc_stress_dbg,v8_linux_gc_stress_dbg,v8_mac_gc_stress_dbg,v8_linux64_asan_rel,v8_linux64_tsan_rel,v8_mac64_asan_rel
BUG=chromium:524425
LOG=N
R=hpayer@chromium.org, ulan@chromium.org

Review URL: https://codereview.chromium.org/1640563004

Cr-Commit-Position: refs/heads/master@{#33552}
parent 997cd3d9
......@@ -1317,6 +1317,7 @@ source_set("v8_base") {
"src/unicode-cache.h",
"src/unicode-decoder.cc",
"src/unicode-decoder.h",
"src/utils-inl.h",
"src/utils.cc",
"src/utils.h",
"src/v8.cc",
......
......@@ -1058,6 +1058,14 @@ inline FunctionKind WithObjectLiteralBit(FunctionKind kind) {
DCHECK(IsValidFunctionKind(kind));
return kind;
}
inline uint32_t ObjectHash(Address address) {
// All objects are at least pointer aligned, so we can remove the trailing
// zeros.
return static_cast<uint32_t>(bit_cast<uintptr_t>(address) >>
kPointerSizeLog2);
}
} // namespace internal
} // namespace v8
......
......@@ -77,6 +77,7 @@ void ArrayBufferTracker::Unregister(JSArrayBuffer* buffer) {
void ArrayBufferTracker::MarkLive(JSArrayBuffer* buffer) {
base::LockGuard<base::Mutex> guard(&mutex_);
void* data = buffer->backing_store();
// ArrayBuffer might be in the middle of being constructed.
......@@ -123,6 +124,8 @@ void ArrayBufferTracker::PrepareDiscoveryInNewSpace() {
void ArrayBufferTracker::Promote(JSArrayBuffer* buffer) {
base::LockGuard<base::Mutex> guard(&mutex_);
if (buffer->is_external()) return;
void* data = buffer->backing_store();
if (!data) return;
......
......@@ -7,6 +7,7 @@
#include <map>
#include "src/base/platform/mutex.h"
#include "src/globals.h"
namespace v8 {
......@@ -47,6 +48,7 @@ class ArrayBufferTracker {
void Promote(JSArrayBuffer* buffer);
private:
base::Mutex mutex_;
Heap* heap_;
// |live_array_buffers_| maps externally allocated memory used as backing
......
......@@ -467,7 +467,7 @@ void Heap::MoveBlock(Address dst, Address src, int byte_size) {
}
}
template <Heap::FindMementoMode mode>
AllocationMemento* Heap::FindAllocationMemento(HeapObject* object) {
// Check if there is potentially a memento behind the object. If
// the last word of the memento is on another page we return
......@@ -476,34 +476,43 @@ AllocationMemento* Heap::FindAllocationMemento(HeapObject* object) {
Address memento_address = object_address + object->Size();
Address last_memento_word_address = memento_address + kPointerSize;
if (!NewSpacePage::OnSamePage(object_address, last_memento_word_address)) {
return NULL;
return nullptr;
}
HeapObject* candidate = HeapObject::FromAddress(memento_address);
Map* candidate_map = candidate->map();
// This fast check may peek at an uninitialized word. However, the slow check
// below (memento_address == top) ensures that this is safe. Mark the word as
// initialized to silence MemorySanitizer warnings.
MSAN_MEMORY_IS_INITIALIZED(&candidate_map, sizeof(candidate_map));
if (candidate_map != allocation_memento_map()) return NULL;
// Either the object is the last object in the new space, or there is another
// object of at least word size (the header map word) following it, so
// suffices to compare ptr and top here. Note that technically we do not have
// to compare with the current top pointer of the from space page during GC,
// since we always install filler objects above the top pointer of a from
// space page when performing a garbage collection. However, always performing
// the test makes it possible to have a single, unified version of
// FindAllocationMemento that is used both by the GC and the mutator.
Address top = NewSpaceTop();
DCHECK(memento_address == top ||
memento_address + HeapObject::kHeaderSize <= top ||
!NewSpacePage::OnSamePage(memento_address, top - 1));
if (memento_address == top) return NULL;
AllocationMemento* memento = AllocationMemento::cast(candidate);
if (!memento->IsValid()) return NULL;
return memento;
if (candidate_map != allocation_memento_map()) {
return nullptr;
}
AllocationMemento* memento_candidate = AllocationMemento::cast(candidate);
// Depending on what the memento is used for, we might need to perform
// additional checks.
Address top;
switch (mode) {
case Heap::kForGC:
return memento_candidate;
case Heap::kForRuntime:
if (memento_candidate == nullptr) return nullptr;
// Either the object is the last object in the new space, or there is
// another object of at least word size (the header map word) following
// it, so suffices to compare ptr and top here.
top = NewSpaceTop();
DCHECK(memento_address == top ||
memento_address + HeapObject::kHeaderSize <= top ||
!NewSpacePage::OnSamePage(memento_address, top - 1));
if ((memento_address != top) && memento_candidate->IsValid()) {
return memento_candidate;
}
return nullptr;
default:
UNREACHABLE();
}
UNREACHABLE();
return nullptr;
}
......@@ -513,24 +522,28 @@ void Heap::UpdateAllocationSite(HeapObject* object,
if (!FLAG_allocation_site_pretenuring ||
!AllocationSite::CanTrack(object->map()->instance_type()))
return;
AllocationMemento* memento = FindAllocationMemento(object);
if (memento == nullptr) return;
AllocationSite* key = memento->GetAllocationSite();
DCHECK(!key->IsZombie());
AllocationMemento* memento_candidate = FindAllocationMemento<kForGC>(object);
if (memento_candidate == nullptr) return;
if (pretenuring_feedback == global_pretenuring_feedback_) {
// Entering global pretenuring feedback is only used in the scavenger, where
// we are allowed to actually touch the allocation site.
if (!memento_candidate->IsValid()) return;
AllocationSite* site = memento_candidate->GetAllocationSite();
DCHECK(!site->IsZombie());
// For inserting in the global pretenuring storage we need to first
// increment the memento found count on the allocation site.
if (key->IncrementMementoFoundCount()) {
global_pretenuring_feedback_->LookupOrInsert(
key, static_cast<uint32_t>(bit_cast<uintptr_t>(key)));
if (site->IncrementMementoFoundCount()) {
global_pretenuring_feedback_->LookupOrInsert(site,
ObjectHash(site->address()));
}
} else {
// Any other pretenuring storage than the global one is used as a cache,
// where the count is later on merge in the allocation site.
HashMap::Entry* e = pretenuring_feedback->LookupOrInsert(
key, static_cast<uint32_t>(bit_cast<uintptr_t>(key)));
// Entering cached feedback is used in the parallel case. We are not allowed
// to dereference the allocation site and rather have to postpone all checks
// till actually merging the data.
Address key = memento_candidate->GetAllocationSiteUnchecked();
HashMap::Entry* e =
pretenuring_feedback->LookupOrInsert(key, ObjectHash(key));
DCHECK(e != nullptr);
(*bit_cast<intptr_t*>(&e->value))++;
}
......
......@@ -518,17 +518,19 @@ void Heap::MergeAllocationSitePretenuringFeedback(
if (map_word.IsForwardingAddress()) {
site = AllocationSite::cast(map_word.ToForwardingAddress());
}
DCHECK(site->IsAllocationSite());
// We have not validated the allocation site yet, since we have not
// dereferenced the site during collecting information.
// This is an inlined check of AllocationMemento::IsValid.
if (!site->IsAllocationSite() || site->IsZombie()) continue;
int value =
static_cast<int>(reinterpret_cast<intptr_t>(local_entry->value));
DCHECK_GT(value, 0);
{
// TODO(mlippautz): For parallel processing we need synchronization here.
if (site->IncrementMementoFoundCount(value)) {
global_pretenuring_feedback_->LookupOrInsert(
site, static_cast<uint32_t>(bit_cast<uintptr_t>(site)));
}
if (site->IncrementMementoFoundCount(value)) {
global_pretenuring_feedback_->LookupOrInsert(site,
ObjectHash(site->address()));
}
}
}
......@@ -566,22 +568,24 @@ void Heap::ProcessPretenuringFeedback() {
bool maximum_size_scavenge = MaximumSizeScavenge();
for (HashMap::Entry* e = global_pretenuring_feedback_->Start();
e != nullptr; e = global_pretenuring_feedback_->Next(e)) {
allocation_sites++;
site = reinterpret_cast<AllocationSite*>(e->key);
int found_count = site->memento_found_count();
// The fact that we have an entry in the storage means that we've found
// the site at least once.
DCHECK_GT(found_count, 0);
DCHECK(site->IsAllocationSite());
allocation_sites++;
active_allocation_sites++;
allocation_mementos_found += found_count;
if (site->DigestPretenuringFeedback(maximum_size_scavenge)) {
trigger_deoptimization = true;
}
if (site->GetPretenureMode() == TENURED) {
tenure_decisions++;
} else {
dont_tenure_decisions++;
// An entry in the storage does not imply that the count is > 0 because
// allocation sites might have been reset due to too many objects dying
// in old space.
if (found_count > 0) {
DCHECK(site->IsAllocationSite());
active_allocation_sites++;
allocation_mementos_found += found_count;
if (site->DigestPretenuringFeedback(maximum_size_scavenge)) {
trigger_deoptimization = true;
}
if (site->GetPretenureMode() == TENURED) {
tenure_decisions++;
} else {
dont_tenure_decisions++;
}
}
}
......
......@@ -450,6 +450,8 @@ class Heap {
enum PretenuringFeedbackInsertionMode { kCached, kGlobal };
enum FindMementoMode { kForRuntime, kForGC };
enum HeapState { NOT_IN_GC, SCAVENGE, MARK_COMPACT };
// Taking this lock prevents the GC from entering a phase that relocates
......@@ -739,6 +741,7 @@ class Heap {
// If an object has an AllocationMemento trailing it, return it, otherwise
// return NULL;
template <FindMementoMode mode>
inline AllocationMemento* FindAllocationMemento(HeapObject* object);
// Returns false if not able to reserve.
......@@ -1219,13 +1222,13 @@ class Heap {
void UpdateSurvivalStatistics(int start_new_space_size);
inline void IncrementPromotedObjectsSize(int object_size) {
inline void IncrementPromotedObjectsSize(intptr_t object_size) {
DCHECK_GE(object_size, 0);
promoted_objects_size_ += object_size;
}
inline intptr_t promoted_objects_size() { return promoted_objects_size_; }
inline void IncrementSemiSpaceCopiedObjectSize(int object_size) {
inline void IncrementSemiSpaceCopiedObjectSize(intptr_t object_size) {
DCHECK_GE(object_size, 0);
semi_space_copied_object_size_ += object_size;
}
......@@ -1243,8 +1246,8 @@ class Heap {
inline void IncrementNodesPromoted() { nodes_promoted_++; }
inline void IncrementYoungSurvivorsCounter(int survived) {
DCHECK(survived >= 0);
inline void IncrementYoungSurvivorsCounter(intptr_t survived) {
DCHECK_GE(survived, 0);
survived_last_scavenge_ = survived;
survived_since_last_expansion_ += survived;
}
......@@ -1993,10 +1996,10 @@ class Heap {
// For keeping track of how much data has survived
// scavenge since last new space expansion.
int survived_since_last_expansion_;
intptr_t survived_since_last_expansion_;
// ... and since the last scavenge.
int survived_last_scavenge_;
intptr_t survived_last_scavenge_;
// This is not the depth of nested AlwaysAllocateScope's but rather a single
// count, as scopes can be acquired from multiple tasks (read: threads).
......
This diff is collapsed.
......@@ -7,6 +7,7 @@
#include "src/base/bits.h"
#include "src/heap/spaces.h"
#include "src/heap/store-buffer.h"
namespace v8 {
namespace internal {
......@@ -406,7 +407,8 @@ class MarkCompactCollector {
void MigrateObject(HeapObject* dst, HeapObject* src, int size,
AllocationSpace to_old_space,
SlotsBuffer** evacuation_slots_buffer);
SlotsBuffer** evacuation_slots_buffer,
LocalStoreBuffer* local_store_buffer);
void InvalidateCode(Code* code);
......@@ -509,13 +511,12 @@ class MarkCompactCollector {
class EvacuateNewSpaceVisitor;
class EvacuateOldSpaceVisitor;
class EvacuateVisitorBase;
class Evacuator;
class HeapObjectVisitor;
class SweeperTask;
typedef std::vector<Page*> SweepingList;
static const int kInitialLocalPretenuringFeedbackCapacity = 256;
explicit MarkCompactCollector(Heap* heap);
bool WillBeDeoptimized(Code* code);
......@@ -704,25 +705,18 @@ class MarkCompactCollector {
void SweepSpaces();
void EvacuateNewSpacePrologue();
// Returns local pretenuring feedback.
HashMap* EvacuateNewSpaceInParallel();
void EvacuateNewSpaceEpilogue();
void AddEvacuationSlotsBufferSynchronized(
SlotsBuffer* evacuation_slots_buffer);
void EvacuatePages(CompactionSpaceCollection* compaction_spaces,
SlotsBuffer** evacuation_slots_buffer);
void EvacuatePagesInParallel();
// The number of parallel compaction tasks, including the main thread.
int NumberOfParallelCompactionTasks();
int NumberOfParallelCompactionTasks(int pages, intptr_t live_bytes);
void StartParallelCompaction(CompactionSpaceCollection** compaction_spaces,
uint32_t* task_ids, int len);
void WaitUntilCompactionCompleted(uint32_t* task_ids, int len);
void StartParallelCompaction(Evacuator** evacuators, int len);
void WaitUntilCompactionCompleted(Evacuator** evacuators, int len);
void EvacuateNewSpaceAndCandidates();
......@@ -751,7 +745,8 @@ class MarkCompactCollector {
// Updates store buffer and slot buffer for a pointer in a migrating object.
void RecordMigratedSlot(Object* value, Address slot,
SlotsBuffer** evacuation_slots_buffer);
SlotsBuffer** evacuation_slots_buffer,
LocalStoreBuffer* local_store_buffer);
// Adds the code entry slot to the slots buffer.
void RecordMigratedCodeEntrySlot(Address code_entry, Address code_entry_slot,
......@@ -777,8 +772,7 @@ class MarkCompactCollector {
bool have_code_to_deoptimize_;
List<Page*> evacuation_candidates_;
List<MemoryChunk*> newspace_evacuation_candidates_;
List<NewSpacePage*> newspace_evacuation_candidates_;
// The evacuation_slots_buffers_ are used by the compaction threads.
// When a compaction task finishes, it uses
......
......@@ -2917,9 +2917,7 @@ class CompactionSpaceCollection : public Malloced {
public:
explicit CompactionSpaceCollection(Heap* heap)
: old_space_(heap, OLD_SPACE, Executability::NOT_EXECUTABLE),
code_space_(heap, CODE_SPACE, Executability::EXECUTABLE),
duration_(0.0),
bytes_compacted_(0) {}
code_space_(heap, CODE_SPACE, Executability::EXECUTABLE) {}
CompactionSpace* Get(AllocationSpace space) {
switch (space) {
......@@ -2934,21 +2932,9 @@ class CompactionSpaceCollection : public Malloced {
return nullptr;
}
void ReportCompactionProgress(double duration, intptr_t bytes_compacted) {
duration_ += duration;
bytes_compacted_ += bytes_compacted;
}
double duration() const { return duration_; }
intptr_t bytes_compacted() const { return bytes_compacted_; }
private:
CompactionSpace old_space_;
CompactionSpace code_space_;
// Book keeping.
double duration_;
intptr_t bytes_compacted_;
};
......
......@@ -26,12 +26,6 @@ void StoreBuffer::Mark(Address addr) {
}
inline void StoreBuffer::MarkSynchronized(Address addr) {
base::LockGuard<base::Mutex> lock_guard(&mutex_);
Mark(addr);
}
void StoreBuffer::EnterDirectlyIntoStoreBuffer(Address addr) {
if (store_buffer_rebuilding_enabled_) {
SLOW_DCHECK(!heap_->code_space()->Contains(addr) &&
......@@ -48,6 +42,22 @@ void StoreBuffer::EnterDirectlyIntoStoreBuffer(Address addr) {
}
}
}
void LocalStoreBuffer::Record(Address addr) {
if (top_->is_full()) top_ = new Node(top_);
top_->buffer[top_->count++] = addr;
}
void LocalStoreBuffer::Process(StoreBuffer* store_buffer) {
Node* current = top_;
while (current != nullptr) {
for (int i = 0; i < current->count; i++) {
store_buffer->Mark(current->buffer[i]);
}
current = current->next;
}
}
} // namespace internal
} // namespace v8
......
......@@ -33,10 +33,6 @@ class StoreBuffer {
// This is used to add addresses to the store buffer non-concurrently.
inline void Mark(Address addr);
// This is used to add addresses to the store buffer when multiple threads
// may operate on the store buffer.
inline void MarkSynchronized(Address addr);
// This is used by the heap traversal to enter the addresses into the store
// buffer that should still be in the store buffer after GC. It enters
// addresses directly into the old buffer because the GC starts by wiping the
......@@ -216,6 +212,39 @@ class DontMoveStoreBufferEntriesScope {
StoreBuffer* store_buffer_;
bool stored_state_;
};
class LocalStoreBuffer BASE_EMBEDDED {
public:
LocalStoreBuffer() : top_(new Node(nullptr)) {}
~LocalStoreBuffer() {
Node* current = top_;
while (current != nullptr) {
Node* tmp = current->next;
delete current;
current = tmp;
}
}
inline void Record(Address addr);
inline void Process(StoreBuffer* store_buffer);
private:
static const int kBufferSize = 16 * KB;
struct Node : Malloced {
explicit Node(Node* next_node) : next(next_node), count(0) {}
inline bool is_full() { return count == kBufferSize; }
Node* next;
Address buffer[kBufferSize];
int count;
};
Node* top_;
};
} // namespace internal
} // namespace v8
......
......@@ -1855,6 +1855,9 @@ AllocationSite* AllocationMemento::GetAllocationSite() {
return AllocationSite::cast(allocation_site());
}
Address AllocationMemento::GetAllocationSiteUnchecked() {
return reinterpret_cast<Address>(allocation_site());
}
void JSObject::EnsureCanContainHeapObjectElements(Handle<JSObject> object) {
JSObject::ValidateElements(object);
......
......@@ -16077,7 +16077,8 @@ void JSObject::UpdateAllocationSite(Handle<JSObject> object,
{
DisallowHeapAllocation no_allocation;
AllocationMemento* memento = heap->FindAllocationMemento(*object);
AllocationMemento* memento =
heap->FindAllocationMemento<Heap::kForRuntime>(*object);
if (memento == NULL) return;
// Walk through to the Allocation Site
......
......@@ -8306,6 +8306,7 @@ class AllocationMemento: public Struct {
inline bool IsValid();
inline AllocationSite* GetAllocationSite();
inline Address GetAllocationSiteUnchecked();
DECLARE_PRINTER(AllocationMemento)
DECLARE_VERIFIER(AllocationMemento)
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_UTILS_INL_H_
#define V8_UTILS_INL_H_
#include "src/utils.h"
#include "include/v8-platform.h"
#include "src/base/platform/time.h"
#include "src/v8.h"
namespace v8 {
namespace internal {
class TimedScope {
public:
explicit TimedScope(double* result)
: start_(TimestampMs()), result_(result) {}
~TimedScope() { *result_ = TimestampMs() - start_; }
private:
static inline double TimestampMs() {
return V8::GetCurrentPlatform()->MonotonicallyIncreasingTime() *
static_cast<double>(base::Time::kMillisecondsPerSecond);
}
double start_;
double* result_;
};
} // namespace internal
} // namespace v8
#endif // V8_UTILS_INL_H_
......@@ -3514,6 +3514,13 @@ TEST(ReleaseOverReservedPages) {
// The optimizer can allocate stuff, messing up the test.
i::FLAG_crankshaft = false;
i::FLAG_always_opt = false;
// Parallel compaction increases fragmentation, depending on how existing
// memory is distributed. Since this is non-deterministic because of
// concurrent sweeping, we disable it for this test.
i::FLAG_parallel_compaction = false;
// Concurrent sweeping adds non determinism, depending on when memory is
// available for further reuse.
i::FLAG_concurrent_sweeping = false;
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
......
......@@ -1080,6 +1080,7 @@
'../../src/unicode-cache.h',
'../../src/unicode-decoder.cc',
'../../src/unicode-decoder.h',
'../../src/utils-inl.h',
'../../src/utils.cc',
'../../src/utils.h',
'../../src/v8.cc',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment