Commit 64c52438 authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

PPC: [wasm-simd] Add support for Simd128 moves and swaps

Change-Id: Ie2668026c5b55af8813f159277bdbc83116c1a00
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2336776Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#69227}
parent b4eef089
...@@ -2828,6 +2828,24 @@ void TurboAssembler::LoadSingleU(DoubleRegister dst, const MemOperand& mem, ...@@ -2828,6 +2828,24 @@ void TurboAssembler::LoadSingleU(DoubleRegister dst, const MemOperand& mem,
} }
} }
void TurboAssembler::LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register ScratchReg,
Simd128Register ScratchDoubleReg) {
// lvx needs the stack to be 16 byte aligned.
// We first use lxvd/stxvd to copy the content on an aligned address. lxvd
// itself reverses the lanes so it cannot be used as is.
lxvd(ScratchDoubleReg, mem);
mr(ScratchReg, sp);
ClearRightImm(
sp, sp,
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
addi(sp, sp, Operand(-16));
stxvd(kScratchDoubleReg, MemOperand(r0, sp));
// Load it with correct lane ordering.
lvx(dst, MemOperand(r0, sp));
mr(sp, ScratchReg);
}
void TurboAssembler::StoreDouble(DoubleRegister src, const MemOperand& mem, void TurboAssembler::StoreDouble(DoubleRegister src, const MemOperand& mem,
Register scratch) { Register scratch) {
Register base = mem.ra(); Register base = mem.ra();
...@@ -2880,6 +2898,23 @@ void TurboAssembler::StoreSingleU(DoubleRegister src, const MemOperand& mem, ...@@ -2880,6 +2898,23 @@ void TurboAssembler::StoreSingleU(DoubleRegister src, const MemOperand& mem,
} }
} }
void TurboAssembler::StoreSimd128(Simd128Register src, const MemOperand& mem,
Register ScratchReg,
Simd128Register ScratchDoubleReg) {
// stvx needs the stack to be 16 byte aligned.
// We use lxvd/stxvd to store the content on an aligned address. stxvd
// itself reverses the lanes so it cannot be used as is.
mr(ScratchReg, sp);
ClearRightImm(
sp, sp,
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
addi(sp, sp, Operand(-16));
stvx(src, MemOperand(r0, sp));
lxvd(ScratchDoubleReg, MemOperand(r0, sp));
mr(sp, ScratchReg);
stxvd(ScratchDoubleReg, mem);
}
Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3, Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
Register reg4, Register reg5, Register reg4, Register reg5,
Register reg6) { Register reg6) {
...@@ -3006,6 +3041,53 @@ void TurboAssembler::SwapDouble(MemOperand src, MemOperand dst, ...@@ -3006,6 +3041,53 @@ void TurboAssembler::SwapDouble(MemOperand src, MemOperand dst,
StoreDouble(scratch_1, src, r0); StoreDouble(scratch_1, src, r0);
} }
void TurboAssembler::SwapSimd128(Simd128Register src, Simd128Register dst,
Simd128Register scratch) {
if (src == dst) return;
vor(scratch, src, src);
vor(src, dst, dst);
vor(dst, scratch, scratch);
}
void TurboAssembler::SwapSimd128(Simd128Register src, MemOperand dst,
Simd128Register scratch) {
DCHECK(!AreAliased(src, scratch));
// push d0, to be used as scratch
addi(sp, sp, Operand(-kSimd128Size));
StoreSimd128(d0, MemOperand(r0, sp), r0, scratch);
mov(ip, Operand(dst.offset()));
LoadSimd128(d0, MemOperand(dst.ra(), ip), r0, scratch);
StoreSimd128(src, MemOperand(dst.ra(), ip), r0, scratch);
vor(src, d0, d0);
// restore d0
LoadSimd128(d0, MemOperand(r0, sp), ip, scratch);
addi(sp, sp, Operand(kSimd128Size));
}
void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst,
Simd128Register scratch) {
// push d0 and d1, to be used as scratch
addi(sp, sp, Operand(2 * -kSimd128Size));
StoreSimd128(d0, MemOperand(r0, sp), ip, scratch);
li(ip, Operand(kSimd128Size));
StoreSimd128(d1, MemOperand(ip, sp), r0, scratch);
mov(ip, Operand(src.offset()));
LoadSimd128(d0, MemOperand(src.ra(), ip), r0, scratch);
mov(ip, Operand(dst.offset()));
LoadSimd128(d1, MemOperand(dst.ra(), ip), r0, scratch);
StoreSimd128(d0, MemOperand(dst.ra(), ip), r0, scratch);
mov(ip, Operand(src.offset()));
StoreSimd128(d1, MemOperand(src.ra(), ip), r0, scratch);
// restore d0 and d1
LoadSimd128(d0, MemOperand(r0, sp), ip, scratch);
li(ip, Operand(kSimd128Size));
LoadSimd128(d1, MemOperand(ip, sp), r0, scratch);
addi(sp, sp, Operand(2 * kSimd128Size));
}
void TurboAssembler::ResetSpeculationPoisonRegister() { void TurboAssembler::ResetSpeculationPoisonRegister() {
mov(kSpeculationPoisonRegister, Operand(-1)); mov(kSpeculationPoisonRegister, Operand(-1));
} }
......
...@@ -153,6 +153,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -153,6 +153,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void LoadFloat32(DoubleRegister dst, const MemOperand& mem, void LoadFloat32(DoubleRegister dst, const MemOperand& mem,
Register scratch = no_reg); Register scratch = no_reg);
void LoadDoubleLiteral(DoubleRegister result, Double value, Register scratch); void LoadDoubleLiteral(DoubleRegister result, Double value, Register scratch);
void LoadSimd128(Simd128Register dst, const MemOperand& mem,
Register ScratchReg, Simd128Register ScratchDoubleReg);
// load a literal signed int value <value> to GPR <dst> // load a literal signed int value <value> to GPR <dst>
void LoadIntLiteral(Register dst, int value); void LoadIntLiteral(Register dst, int value);
...@@ -175,6 +177,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -175,6 +177,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register scratch = no_reg); Register scratch = no_reg);
void StoreSingleU(DoubleRegister src, const MemOperand& mem, void StoreSingleU(DoubleRegister src, const MemOperand& mem,
Register scratch = no_reg); Register scratch = no_reg);
void StoreSimd128(Simd128Register src, const MemOperand& mem,
Register ScratchReg, Simd128Register ScratchDoubleReg);
void Cmpi(Register src1, const Operand& src2, Register scratch, void Cmpi(Register src1, const Operand& src2, Register scratch,
CRegister cr = cr7); CRegister cr = cr7);
...@@ -326,6 +330,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -326,6 +330,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void SwapDouble(DoubleRegister src, MemOperand dst, DoubleRegister scratch); void SwapDouble(DoubleRegister src, MemOperand dst, DoubleRegister scratch);
void SwapDouble(MemOperand src, MemOperand dst, DoubleRegister scratch_0, void SwapDouble(MemOperand src, MemOperand dst, DoubleRegister scratch_0,
DoubleRegister scratch_1); DoubleRegister scratch_1);
void SwapSimd128(Simd128Register src, Simd128Register dst,
Simd128Register scratch);
void SwapSimd128(Simd128Register src, MemOperand dst,
Simd128Register scratch);
void SwapSimd128(MemOperand src, MemOperand dst, Simd128Register scratch);
// Before calling a C-function from generated code, align arguments on stack. // Before calling a C-function from generated code, align arguments on stack.
// After aligning the frame, non-register arguments must be stored in // After aligning the frame, non-register arguments must be stored in
......
...@@ -108,7 +108,8 @@ class PPCOperandConverter final : public InstructionOperandConverter { ...@@ -108,7 +108,8 @@ class PPCOperandConverter final : public InstructionOperandConverter {
return MemoryOperand(mode, &first_index); return MemoryOperand(mode, &first_index);
} }
MemOperand ToMemOperand(InstructionOperand* op) const { MemOperand ToMemOperand(InstructionOperand* op,
AddressingMode mode = kMode_None) const {
DCHECK_NOT_NULL(op); DCHECK_NOT_NULL(op);
DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
return SlotToMemOperand(AllocatedOperand::cast(op)->index()); return SlotToMemOperand(AllocatedOperand::cast(op)->index());
...@@ -1739,16 +1740,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1739,16 +1740,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kPPC_Push: case kPPC_Push:
if (instr->InputAt(0)->IsFPRegister()) { if (instr->InputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->InputAt(0)); LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) { switch (op->representation()) {
__ StoreDoubleU(i.InputDoubleRegister(0), case MachineRepresentation::kFloat32:
MemOperand(sp, -kDoubleSize), r0); __ StoreSingleU(i.InputDoubleRegister(0),
frame_access_state()->IncreaseSPDelta(kDoubleSize / MemOperand(sp, -kSystemPointerSize), r0);
kSystemPointerSize); frame_access_state()->IncreaseSPDelta(1);
} else { break;
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); case MachineRepresentation::kFloat64:
__ StoreSingleU(i.InputDoubleRegister(0), __ StoreDoubleU(i.InputDoubleRegister(0),
MemOperand(sp, -kSystemPointerSize), r0); MemOperand(sp, -kDoubleSize), r0);
frame_access_state()->IncreaseSPDelta(1); frame_access_state()->IncreaseSPDelta(kDoubleSize /
kSystemPointerSize);
break;
case MachineRepresentation::kSimd128: {
__ addi(sp, sp, Operand(-kSimd128Size));
__ StoreSimd128(i.InputDoubleRegister(0), MemOperand(r0, sp), r0,
kScratchDoubleReg);
frame_access_state()->IncreaseSPDelta(kSimd128Size /
kSystemPointerSize);
break;
}
default:
UNREACHABLE();
break;
} }
} else { } else {
__ StorePU(i.InputRegister(0), MemOperand(sp, -kSystemPointerSize), r0); __ StorePU(i.InputRegister(0), MemOperand(sp, -kSystemPointerSize), r0);
...@@ -1781,10 +1795,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1781,10 +1795,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (op->representation() == MachineRepresentation::kFloat64) { if (op->representation() == MachineRepresentation::kFloat64) {
__ StoreDouble(i.InputDoubleRegister(0), __ StoreDouble(i.InputDoubleRegister(0),
MemOperand(sp, slot * kSystemPointerSize), r0); MemOperand(sp, slot * kSystemPointerSize), r0);
} else { } else if (op->representation() == MachineRepresentation::kFloat32) {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ StoreSingle(i.InputDoubleRegister(0), __ StoreSingle(i.InputDoubleRegister(0),
MemOperand(sp, slot * kSystemPointerSize), r0); MemOperand(sp, slot * kSystemPointerSize), r0);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
__ mov(ip, Operand(slot * kSystemPointerSize));
__ StoreSimd128(i.InputDoubleRegister(0), MemOperand(ip, sp), r0,
kScratchDoubleReg);
} }
} else { } else {
__ StoreP(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize), __ StoreP(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize),
...@@ -2044,20 +2062,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2044,20 +2062,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
bool is_atomic = i.InputInt32(2); bool is_atomic = i.InputInt32(2);
// lvx only supports MRR. // lvx only supports MRR.
DCHECK_EQ(mode, kMode_MRR); DCHECK_EQ(mode, kMode_MRR);
// lvx needs the stack to be 16 byte aligned. __ LoadSimd128(result, operand, r0, kScratchDoubleReg);
// We first use lxvd/stxvd to copy the content on an aligned address. lxvd
// itself reverses the lanes so it cannot be used as is.
__ lxvd(kScratchDoubleReg, operand);
__ mr(kScratchReg, sp);
__ ClearRightImm(
sp, sp,
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
__ addi(sp, sp, Operand(-16));
__ li(r0, Operand(0));
__ stxvd(kScratchDoubleReg, MemOperand(sp, r0));
// Load it with correct lane ordering.
__ lvx(result, MemOperand(sp, r0));
__ mr(sp, kScratchReg);
if (is_atomic) __ lwsync(); if (is_atomic) __ lwsync();
DCHECK_EQ(LeaveRC, i.OutputRCBit()); DCHECK_EQ(LeaveRC, i.OutputRCBit());
break; break;
...@@ -2091,19 +2096,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2091,19 +2096,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (is_atomic) __ lwsync(); if (is_atomic) __ lwsync();
// stvx only supports MRR. // stvx only supports MRR.
DCHECK_EQ(mode, kMode_MRR); DCHECK_EQ(mode, kMode_MRR);
// stvx needs the stack to be 16 byte aligned. __ StoreSimd128(value, operand, r0, kScratchDoubleReg);
// We use lxvd/stxvd to store the content on an aligned address. stxvd
// itself reverses the lanes so it cannot be used as is.
__ mr(kScratchReg, sp);
__ ClearRightImm(
sp, sp,
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
__ addi(sp, sp, Operand(-16));
__ li(r0, Operand(0));
__ stvx(value, MemOperand(sp, r0));
__ lxvd(kScratchDoubleReg, MemOperand(sp, r0));
__ stxvd(kScratchDoubleReg, operand);
__ mr(sp, kScratchReg);
if (is_atomic) __ sync(); if (is_atomic) __ sync();
DCHECK_EQ(LeaveRC, i.OutputRCBit()); DCHECK_EQ(LeaveRC, i.OutputRCBit());
break; break;
...@@ -3877,17 +3870,31 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, ...@@ -3877,17 +3870,31 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
} }
} }
} else if (source->IsFPRegister()) { } else if (source->IsFPRegister()) {
DoubleRegister src = g.ToDoubleRegister(source); MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (destination->IsFPRegister()) { if (rep == MachineRepresentation::kSimd128) {
DoubleRegister dst = g.ToDoubleRegister(destination); if (destination->IsSimd128Register()) {
__ Move(dst, src); __ vor(g.ToSimd128Register(destination), g.ToSimd128Register(source),
g.ToSimd128Register(source));
} else {
DCHECK(destination->IsSimd128StackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ mov(ip, Operand(dst.offset()));
__ StoreSimd128(g.ToSimd128Register(source), MemOperand(dst.ra(), ip),
r0, kScratchDoubleReg);
}
} else { } else {
DCHECK(destination->IsFPStackSlot()); DoubleRegister src = g.ToDoubleRegister(source);
LocationOperand* op = LocationOperand::cast(source); if (destination->IsFPRegister()) {
if (op->representation() == MachineRepresentation::kFloat64) { DoubleRegister dst = g.ToDoubleRegister(destination);
__ StoreDouble(src, g.ToMemOperand(destination), r0); __ Move(dst, src);
} else { } else {
__ StoreSingle(src, g.ToMemOperand(destination), r0); DCHECK(destination->IsFPStackSlot());
LocationOperand* op = LocationOperand::cast(source);
if (op->representation() == MachineRepresentation::kFloat64) {
__ StoreDouble(src, g.ToMemOperand(destination), r0);
} else {
__ StoreSingle(src, g.ToMemOperand(destination), r0);
}
} }
} }
} else if (source->IsFPStackSlot()) { } else if (source->IsFPStackSlot()) {
...@@ -3897,8 +3904,14 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, ...@@ -3897,8 +3904,14 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
LocationOperand* op = LocationOperand::cast(source); LocationOperand* op = LocationOperand::cast(source);
if (op->representation() == MachineRepresentation::kFloat64) { if (op->representation() == MachineRepresentation::kFloat64) {
__ LoadDouble(g.ToDoubleRegister(destination), src, r0); __ LoadDouble(g.ToDoubleRegister(destination), src, r0);
} else { } else if (op->representation() == MachineRepresentation::kFloat32) {
__ LoadSingle(g.ToDoubleRegister(destination), src, r0); __ LoadSingle(g.ToDoubleRegister(destination), src, r0);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
MemOperand src = g.ToMemOperand(source);
__ mov(ip, Operand(src.offset()));
__ LoadSimd128(g.ToSimd128Register(destination),
MemOperand(src.ra(), ip), r0, kScratchDoubleReg);
} }
} else { } else {
LocationOperand* op = LocationOperand::cast(source); LocationOperand* op = LocationOperand::cast(source);
...@@ -3906,9 +3919,23 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, ...@@ -3906,9 +3919,23 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
if (op->representation() == MachineRepresentation::kFloat64) { if (op->representation() == MachineRepresentation::kFloat64) {
__ LoadDouble(temp, src, r0); __ LoadDouble(temp, src, r0);
__ StoreDouble(temp, g.ToMemOperand(destination), r0); __ StoreDouble(temp, g.ToMemOperand(destination), r0);
} else { } else if (op->representation() == MachineRepresentation::kFloat32) {
__ LoadSingle(temp, src, r0); __ LoadSingle(temp, src, r0);
__ StoreSingle(temp, g.ToMemOperand(destination), r0); __ StoreSingle(temp, g.ToMemOperand(destination), r0);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
// push d0, to be used as scratch
__ addi(sp, sp, Operand(-kSimd128Size));
__ StoreSimd128(d0, MemOperand(r0, sp), r0, kScratchDoubleReg);
MemOperand src = g.ToMemOperand(source);
MemOperand dst = g.ToMemOperand(destination);
__ mov(ip, Operand(src.offset()));
__ LoadSimd128(d0, MemOperand(src.ra(), ip), r0, kScratchDoubleReg);
__ mov(ip, Operand(dst.offset()));
__ StoreSimd128(d0, MemOperand(dst.ra(), ip), r0, kScratchDoubleReg);
// restore d0
__ LoadSimd128(d0, MemOperand(r0, sp), ip, kScratchDoubleReg);
__ addi(sp, sp, Operand(kSimd128Size));
} }
} }
} else { } else {
...@@ -3963,8 +3990,20 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source, ...@@ -3963,8 +3990,20 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
DCHECK(destination->IsDoubleStackSlot()); DCHECK(destination->IsDoubleStackSlot());
__ SwapDouble(g.ToMemOperand(source), g.ToMemOperand(destination), __ SwapDouble(g.ToMemOperand(source), g.ToMemOperand(destination),
kScratchDoubleReg, d0); kScratchDoubleReg, d0);
} else if (source->IsSimd128Register()) { } else if (source->IsSimd128Register()) {
UNREACHABLE(); Simd128Register src = g.ToSimd128Register(source);
if (destination->IsSimd128Register()) {
__ SwapSimd128(src, g.ToSimd128Register(destination), kScratchDoubleReg);
} else {
DCHECK(destination->IsSimd128StackSlot());
__ SwapSimd128(src, g.ToMemOperand(destination), kScratchDoubleReg);
}
} else if (source->IsSimd128StackSlot()) {
DCHECK(destination->IsSimd128StackSlot());
__ SwapSimd128(g.ToMemOperand(source), g.ToMemOperand(destination),
kScratchDoubleReg);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment