Commit a2b98b60 authored by Clemens Hammacher's avatar Clemens Hammacher Committed by Commit Bot

Reland "[wasm] Test concurrent code emission"

This is a reland of 11524453

Original change's description:
> [wasm] Test concurrent code emission
> 
> This extends the jump table stress test. Currently, we generate
> different thunks (on the main thread) and then concurrently update the
> jump table to jump to one of these thunks.
> With this CL, we also generate the thunks concurrently. So this also
> tests whether there is proper synchronization between code generation
> and executing it in another thread.
> 
> R=ahaas@chromium.org, mstarzinger@chromium.org
> 
> Bug: v8:9477
> Change-Id: I3598329e37482ebd27a13acc752581c714226184
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1735319
> Reviewed-by: Andreas Haas <ahaas@chromium.org>
> Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#63097}

Bug: v8:9477
Change-Id: Iac696f1ff3cd5209231a8dd8d1500cf77c2777b8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1739370
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63138}
parent 7ef2e646
...@@ -32,16 +32,18 @@ constexpr int kJumpTableSlotCount = 128; ...@@ -32,16 +32,18 @@ constexpr int kJumpTableSlotCount = 128;
constexpr uint32_t kJumpTableSize = constexpr uint32_t kJumpTableSize =
JumpTableAssembler::SizeForNumberOfSlots(kJumpTableSlotCount); JumpTableAssembler::SizeForNumberOfSlots(kJumpTableSlotCount);
constexpr size_t kThunkBufferSize = AssemblerBase::kMinimalBufferSize;
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
constexpr uint32_t kAvailableBufferSlots = constexpr uint32_t kAvailableBufferSlots =
(kMaxWasmCodeMemory - kJumpTableSize) / AssemblerBase::kMinimalBufferSize; (kMaxWasmCodeMemory - kJumpTableSize) / kThunkBufferSize;
constexpr uint32_t kBufferSlotStartOffset = constexpr uint32_t kBufferSlotStartOffset =
RoundUp<AssemblerBase::kMinimalBufferSize>(kJumpTableSize); RoundUp<kThunkBufferSize>(kJumpTableSize);
#else #else
constexpr uint32_t kAvailableBufferSlots = 0; constexpr uint32_t kAvailableBufferSlots = 0;
#endif #endif
Address GenerateJumpTableThunk( Address AllocateJumpTableThunk(
Address jump_target, byte* thunk_slot_buffer, Address jump_target, byte* thunk_slot_buffer,
std::bitset<kAvailableBufferSlots>* used_slots, std::bitset<kAvailableBufferSlots>* used_slots,
std::vector<std::unique_ptr<TestingAssemblerBuffer>>* thunk_buffers) { std::vector<std::unique_ptr<TestingAssemblerBuffer>>* thunk_buffers) {
...@@ -62,20 +64,22 @@ Address GenerateJumpTableThunk( ...@@ -62,20 +64,22 @@ Address GenerateJumpTableThunk(
buffer_index = rng->NextInt(kAvailableBufferSlots); buffer_index = rng->NextInt(kAvailableBufferSlots);
} while (used_slots->test(buffer_index)); } while (used_slots->test(buffer_index));
used_slots->set(buffer_index); used_slots->set(buffer_index);
byte* buffer = return reinterpret_cast<Address>(thunk_slot_buffer +
thunk_slot_buffer + buffer_index * AssemblerBase::kMinimalBufferSize; buffer_index * kThunkBufferSize);
#else #else
USE(thunk_slot_buffer); USE(thunk_slot_buffer);
USE(used_slots); USE(used_slots);
thunk_buffers->emplace_back(AllocateAssemblerBuffer( thunk_buffers->emplace_back(
AssemblerBase::kMinimalBufferSize, GetRandomMmapAddr())); AllocateAssemblerBuffer(kThunkBufferSize, GetRandomMmapAddr()));
byte* buffer = thunk_buffers->back()->start(); return reinterpret_cast<Address>(thunk_buffers->back()->start());
#endif #endif
}
MacroAssembler masm( void CompileJumpTableThunk(Address thunk, Address jump_target) {
nullptr, AssemblerOptions{}, CodeObjectRequired::kNo, MacroAssembler masm(nullptr, AssemblerOptions{}, CodeObjectRequired::kNo,
ExternalAssemblerBuffer(buffer, AssemblerBase::kMinimalBufferSize)); ExternalAssemblerBuffer(reinterpret_cast<void*>(thunk),
kThunkBufferSize));
Label exit; Label exit;
Register scratch = kReturnRegister0; Register scratch = kReturnRegister0;
...@@ -132,10 +136,9 @@ Address GenerateJumpTableThunk( ...@@ -132,10 +136,9 @@ Address GenerateJumpTableThunk(
__ bind(&exit); __ bind(&exit);
__ Ret(); __ Ret();
CodeDesc desc; FlushInstructionCache(thunk, kThunkBufferSize);
masm.GetCode(nullptr, &desc); CHECK(SetPermissions(GetPlatformPageAllocator(), thunk, kThunkBufferSize,
FlushInstructionCache(buffer, desc.instr_size); v8::PageAllocator::kReadExecute));
return reinterpret_cast<Address>(buffer);
} }
class JumpTableRunner : public v8::base::Thread { class JumpTableRunner : public v8::base::Thread {
...@@ -160,29 +163,38 @@ class JumpTableRunner : public v8::base::Thread { ...@@ -160,29 +163,38 @@ class JumpTableRunner : public v8::base::Thread {
class JumpTablePatcher : public v8::base::Thread { class JumpTablePatcher : public v8::base::Thread {
public: public:
JumpTablePatcher(Address slot_start, uint32_t slot_index, Address thunk1, JumpTablePatcher(Address slot_start, uint32_t slot_index, Address thunk1,
Address thunk2) Address thunk2, base::Mutex* jump_table_mutex)
: Thread(Options("JumpTablePatcher")), : Thread(Options("JumpTablePatcher")),
slot_start_(slot_start), slot_start_(slot_start),
slot_index_(slot_index), slot_index_(slot_index),
thunks_{thunk1, thunk2} {} thunks_{thunk1, thunk2},
jump_table_mutex_(jump_table_mutex) {}
void Run() override { void Run() override {
TRACE("Patcher is starting ...\n"); TRACE("Patcher %p is starting ...\n", this);
Address slot_address =
slot_start_ + JumpTableAssembler::JumpSlotIndexToOffset(slot_index_);
// First, emit code to the two thunks.
for (Address thunk : thunks_) {
CompileJumpTableThunk(thunk, slot_address);
}
// Then, repeatedly patch the jump table to jump to one of the two thunks.
constexpr int kNumberOfPatchIterations = 64; constexpr int kNumberOfPatchIterations = 64;
for (int i = 0; i < kNumberOfPatchIterations; ++i) { for (int i = 0; i < kNumberOfPatchIterations; ++i) {
TRACE(" patch slot " V8PRIxPTR_FMT " to thunk #%d\n", TRACE(" patcher %p patch slot " V8PRIxPTR_FMT " to thunk #%d\n", this,
slot_start_ + JumpTableAssembler::SlotIndexToOffset(slot_index_), slot_address, i % 2);
i % 2); base::MutexGuard jump_table_guard(jump_table_mutex_);
JumpTableAssembler::PatchJumpTableSlot( JumpTableAssembler::PatchJumpTableSlot(
slot_start_, slot_index_, thunks_[i % 2], WasmCode::kFlushICache); slot_start_, slot_index_, thunks_[i % 2], WasmCode::kFlushICache);
} }
TRACE("Patcher is stopping ...\n"); TRACE("Patcher %p is stopping ...\n", this);
} }
private: private:
Address slot_start_; Address slot_start_;
uint32_t slot_index_; uint32_t slot_index_;
Address thunks_[2]; Address thunks_[2];
base::Mutex* jump_table_mutex_;
}; };
} // namespace } // namespace
...@@ -199,9 +211,10 @@ class JumpTablePatcher : public v8::base::Thread { ...@@ -199,9 +211,10 @@ class JumpTablePatcher : public v8::base::Thread {
// one of the runners is currently executing the jump-table slot. // one of the runners is currently executing the jump-table slot.
TEST(JumpTablePatchingStress) { TEST(JumpTablePatchingStress) {
constexpr int kNumberOfRunnerThreads = 5; constexpr int kNumberOfRunnerThreads = 5;
constexpr int kNumberOfPatcherThreads = 3;
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
// We need the branches (from GenerateJumpTableThunk) to be within near-call // We need the branches (from CompileJumpTableThunk) to be within near-call
// range of the jump table slots. The address hint to AllocateAssemblerBuffer // range of the jump table slots. The address hint to AllocateAssemblerBuffer
// is not reliable enough to guarantee that we can always achieve this with // is not reliable enough to guarantee that we can always achieve this with
// separate allocations, so for Arm64 we generate all code in a single // separate allocations, so for Arm64 we generate all code in a single
...@@ -227,29 +240,42 @@ TEST(JumpTablePatchingStress) { ...@@ -227,29 +240,42 @@ TEST(JumpTablePatchingStress) {
TRACE("Hammering on jump table slot #%d ...\n", slot); TRACE("Hammering on jump table slot #%d ...\n", slot);
uint32_t slot_offset = JumpTableAssembler::JumpSlotIndexToOffset(slot); uint32_t slot_offset = JumpTableAssembler::JumpSlotIndexToOffset(slot);
std::vector<std::unique_ptr<TestingAssemblerBuffer>> thunk_buffers; std::vector<std::unique_ptr<TestingAssemblerBuffer>> thunk_buffers;
Address thunk1 = // Patch the jump table slot to jump to itself. This will later be patched
GenerateJumpTableThunk(slot_start + slot_offset, thunk_slot_buffer, // by the patchers.
&used_thunk_slots, &thunk_buffers); JumpTableAssembler::PatchJumpTableSlot(
Address thunk2 = slot_start, slot, slot_start + slot_offset, WasmCode::kFlushICache);
GenerateJumpTableThunk(slot_start + slot_offset, thunk_slot_buffer, // For each patcher, generate two thunks where this patcher can emit code
&used_thunk_slots, &thunk_buffers); // which finally jumps back to {slot} in the jump table.
TRACE(" generated thunk1: " V8PRIxPTR_FMT "\n", thunk1); std::vector<Address> patcher_thunks;
TRACE(" generated thunk2: " V8PRIxPTR_FMT "\n", thunk2); for (int i = 0; i < 2 * kNumberOfPatcherThreads; ++i) {
JumpTableAssembler::PatchJumpTableSlot(slot_start, slot, thunk1, Address thunk =
WasmCode::kFlushICache); AllocateJumpTableThunk(slot_start + slot_offset, thunk_slot_buffer,
&used_thunk_slots, &thunk_buffers);
ZapCode(thunk, kThunkBufferSize);
patcher_thunks.push_back(thunk);
TRACE(" generated jump thunk: " V8PRIxPTR_FMT "\n",
patcher_thunks.back());
}
for (auto& buf : thunk_buffers) buf->MakeExecutable(); // Start multiple runner threads that execute the jump table slot
// Start multiple runner threads and a patcher thread that hammer on the // concurrently.
// same jump-table slot concurrently.
std::list<JumpTableRunner> runners; std::list<JumpTableRunner> runners;
for (int runner = 0; runner < kNumberOfRunnerThreads; ++runner) { for (int runner = 0; runner < kNumberOfRunnerThreads; ++runner) {
runners.emplace_back(slot_start + slot_offset, runner); runners.emplace_back(slot_start + slot_offset, runner);
} }
JumpTablePatcher patcher(slot_start, slot, thunk1, thunk2); // Start multiple patcher thread that concurrently generate code and insert
// jumps to that into the jump table slot.
std::list<JumpTablePatcher> patchers;
// Only one patcher should modify the jump table at a time.
base::Mutex jump_table_mutex;
for (int i = 0; i < kNumberOfPatcherThreads; ++i) {
patchers.emplace_back(slot_start, slot, patcher_thunks[2 * i],
patcher_thunks[2 * i + 1], &jump_table_mutex);
}
global_stop_bit = 0; // Signal runners to keep going. global_stop_bit = 0; // Signal runners to keep going.
for (auto& runner : runners) CHECK(runner.Start()); for (auto& runner : runners) CHECK(runner.Start());
CHECK(patcher.Start()); for (auto& patcher : patchers) CHECK(patcher.Start());
patcher.Join(); for (auto& patcher : patchers) patcher.Join();
global_stop_bit = -1; // Signal runners to stop. global_stop_bit = -1; // Signal runners to stop.
for (auto& runner : runners) runner.Join(); for (auto& runner : runners) runner.Join();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment