Commit f3a5c36a authored by Clemens Backes's avatar Clemens Backes Committed by Commit Bot

Revert "[wasm-simd][liftoff] Add S128 case for stack movements"

This reverts commit 5cfe053e.

Reason for revert: "liftoff-simd-params" also fails on no-sse :/

Original change's description:
> [wasm-simd][liftoff] Add S128 case for stack movements
> 
> The two cases we are fixing here are Construct and
> LoadCallerFrameSlot, which are closely related.
> 
> Construct is called during PrepareCall, where we build up
> LiftoffStackSlots when we need to move an arg from caller's stack frame
> into callee's stack frame. LoadCallerFrameSlot is the parallel to
> this, called in ProcessParameter during decoding of the callee's
> function body.
> 
> In most cases, Construct needs a new case to handle kWasmS128, and calls
> the relevant assembler to push a s128 onto the stack.
> 
> ARM64 requires 16-byte alignment of sp, so we need to Claim the right
> number of kXRegSize slots first, which requires
> us traversing the list of slots to figure out how many s128 values there
> are. This is a straightforward way to fix this, if efficiency is a
> problem, we can change LiftOffStackSlots::Add to sum up the slot sizes.
> 
> On IA32, pushing s128 values will require 4 calls to push. Instead, we
> use a sub and two movdqu, which will generate less code in most cases.
> 
> On x64, there is no 128-bit push, so we call push twice.
> 
> Bug: v8:9909
> Change-Id: I3af35b8462ea9c3b9b2d90800c37d11b5e95be59
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2015945
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#65956}

TBR=clemensb@chromium.org,zhin@chromium.org,joey.gouly@arm.com

Change-Id: Ib3c5a088e2d85baf1d8b143272844fb5ebb33c57
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:9909
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2017724Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65960}
parent b6cc74e0
......@@ -577,13 +577,6 @@ void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
case kWasmF64:
vldr(dst.fp(), src);
break;
case kWasmS128: {
UseScratchRegisterScope temps(this);
Register addr = liftoff::CalculateActualAddress(this, &temps, src.rn(),
no_reg, src.offset());
vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr));
break;
}
default:
UNREACHABLE();
}
......@@ -1709,16 +1702,6 @@ void LiftoffStackSlots::Construct() {
asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
asm_->vpush(scratch);
} break;
case kWasmS128: {
MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_);
UseScratchRegisterScope temps(asm_);
Register addr = liftoff::CalculateActualAddress(
asm_, &temps, mem_op.rn(), no_reg, mem_op.offset());
QwNeonRegister scratch = temps.AcquireQ();
asm_->vld1(Neon8, NeonListOperand(scratch), NeonMemOperand(addr));
asm_->vpush(scratch);
break;
}
default:
UNREACHABLE();
}
......@@ -1740,9 +1723,6 @@ void LiftoffStackSlots::Construct() {
case kWasmF64:
asm_->vpush(src.reg().fp());
break;
case kWasmS128:
asm_->vpush(liftoff::GetSimd128Register(src.reg().low_fp()));
break;
default:
UNREACHABLE();
}
......
......@@ -1152,15 +1152,12 @@ void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
void LiftoffAssembler::DebugBreak() { debug("DebugBreak", 0, BREAK); }
void LiftoffStackSlots::Construct() {
size_t num_slots = 0;
for (auto& slot : slots_) {
num_slots += slot.src_.type() == kWasmS128 ? 2 : 1;
}
size_t slot_count = slots_.size();
// The stack pointer is required to be quadword aligned.
asm_->Claim(RoundUp(num_slots, 2));
size_t poke_offset = num_slots * kXRegSize;
asm_->Claim(RoundUp(slot_count, 2));
size_t slot_index = 0;
for (auto& slot : slots_) {
poke_offset -= slot.src_.type() == kWasmS128 ? kXRegSize * 2 : kXRegSize;
size_t poke_offset = (slot_count - slot_index - 1) * kXRegSize;
switch (slot.src_.loc()) {
case LiftoffAssembler::VarState::kStack: {
UseScratchRegisterScope temps(asm_);
......@@ -1187,6 +1184,7 @@ void LiftoffStackSlots::Construct() {
}
break;
}
slot_index++;
}
}
......
......@@ -54,9 +54,6 @@ inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Register base,
case kWasmF64:
assm->movsd(dst.fp(), src);
break;
case kWasmS128:
assm->movdqu(dst.fp(), src);
break;
default:
UNREACHABLE();
}
......@@ -101,10 +98,6 @@ inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueType type) {
assm->AllocateStackSpace(sizeof(double));
assm->movsd(Operand(esp, 0), reg.fp());
break;
case kWasmS128:
assm->AllocateStackSpace(sizeof(double) * 2);
assm->movdqu(Operand(esp, 0), reg.fp());
break;
default:
UNREACHABLE();
}
......@@ -2040,15 +2033,6 @@ void LiftoffStackSlots::Construct() {
const LiftoffAssembler::VarState& src = slot.src_;
switch (src.loc()) {
case LiftoffAssembler::VarState::kStack:
// The combination of AllocateStackSpace and 2 movdqu is usually smaller
// in code size than doing 4 pushes.
if (src.type() == kWasmS128) {
asm_->AllocateStackSpace(sizeof(double) * 2);
asm_->movdqu(liftoff::kScratchDoubleReg,
liftoff::GetStackSlot(slot.src_offset_));
asm_->movdqu(Operand(esp, 0), liftoff::kScratchDoubleReg);
break;
}
if (src.type() == kWasmF64) {
DCHECK_EQ(kLowWord, slot.half_);
asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, kHighWord));
......
......@@ -71,9 +71,6 @@ inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Operand src,
case kWasmF64:
assm->Movsd(dst.fp(), src);
break;
case kWasmS128:
assm->Movdqu(dst.fp(), src);
break;
default:
UNREACHABLE();
}
......@@ -113,10 +110,6 @@ inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueType type) {
assm->AllocateStackSpace(kSystemPointerSize);
assm->Movsd(Operand(rsp, 0), reg.fp());
break;
case kWasmS128:
assm->AllocateStackSpace(kSystemPointerSize * 2);
assm->Movdqu(Operand(rsp, 0), reg.fp());
break;
default:
UNREACHABLE();
}
......@@ -1763,11 +1756,6 @@ void LiftoffStackSlots::Construct() {
// extended.
asm_->movl(kScratchRegister, liftoff::GetStackSlot(slot.src_offset_));
asm_->pushq(kScratchRegister);
} else if (src.type() == kWasmS128) {
// Since offsets are subtracted from sp, we need a smaller offset to
// push the top of a s128 value.
asm_->pushq(liftoff::GetStackSlot(slot.src_offset_ - 8));
asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
} else {
// For all other types, just push the whole (8-byte) stack slot.
// This is also ok for f32 values (even though we copy 4 uninitialized
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --experimental-wasm-simd
load('test/mjsunit/wasm/wasm-module-builder.js');
// This test case tries to exercise SIMD stack to stack movements by creating
// a function that has many parameters.
(function() {
const builder = new WasmModuleBuilder();
// At this point we have limited support for SIMD operations, but we can load
// and store s128 values. So this memory will be used for reading and writing
// s128 values and we will assert expected values from JS.
builder.addImportedMemory('m', 'imported_mem', 1, 2);
builder.addType(makeSig(new Array(18).fill(kWasmS128), []));
builder.addFunction(undefined, makeSig([], []))
.addLocals({s128_count: 9})
.addBodyWithEnd([
// These will all be args to the callee.
// Load first arg from memory, this was written with values from JS.
kExprI32Const, 0,
kSimdPrefix, kExprS128LoadMem, 0, 0,
kExprLocalGet, 0,
kExprLocalGet, 1,
kExprLocalGet, 2,
kExprLocalGet, 3,
kExprLocalGet, 4,
kExprLocalGet, 5,
kExprLocalGet, 6,
kExprLocalGet, 7,
kExprLocalGet, 8,
kExprLocalGet, 0,
kExprLocalGet, 1,
kExprLocalGet, 2,
kExprLocalGet, 3,
kExprLocalGet, 4,
kExprLocalGet, 5,
kExprLocalGet, 6,
// Load last s128 from memory, this was written with values from JS.
kExprI32Const, 16,
kSimdPrefix, kExprS128LoadMem, 0, 0,
kExprCallFunction, 0x01,
kExprEnd,
]);
builder.addFunction(undefined, 0 /* sig */)
.addBodyWithEnd([
kExprI32Const, 32,
kExprLocalGet, 0,
kSimdPrefix, kExprS128StoreMem, 0, 0,
kExprI32Const, 48,
kExprLocalGet, 17,
kSimdPrefix, kExprS128StoreMem, 0, 0,
kExprEnd,
]);
builder.addExport('main', 0);
var memory = new WebAssembly.Memory({initial:1, maximum:2});
const instance = builder.instantiate({m: {imported_mem: memory}});
// We write sentinel values to two s128 values at the start of the memory.
// Function 1 will read these values from memory, and pass them as the first
// and last arg to function 2. Function 2 then write these values to memory
// after these two s128 values.
const arr = new Uint32Array(memory.buffer);
for (let i = 0; i < 8; i++) {
arr[0] = i * 2;
}
instance.exports.main();
for (let i = 0; i < 8; i++) {
assertEquals(arr[i], arr[i + 8]);
}
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment