Commit 226a4819 authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

PPC: [wasm-simd] Allow simd load/store on unaligned addresses

lvx and stvx require 16-byte aligned addresses.
This CL enables loading and storing to addresses which are not
16-byte aligned.

Change-Id: I5635e857a979520822c8b30bb5477a159e97e6e5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2327648Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#69135}
parent 80396fd3
...@@ -1777,6 +1777,18 @@ void Assembler::mtvsrd(const Simd128Register rt, const Register ra) { ...@@ -1777,6 +1777,18 @@ void Assembler::mtvsrd(const Simd128Register rt, const Register ra) {
emit(MTVSRD | rt.code() * B21 | ra.code() * B16 | TX); emit(MTVSRD | rt.code() * B21 | ra.code() * B16 | TX);
} }
void Assembler::lxvd(const Simd128Register rt, const MemOperand& src) {
int TX = 1;
emit(LXVD | rt.code() * B21 | src.ra().code() * B16 | src.rb().code() * B11 |
TX);
}
void Assembler::stxvd(const Simd128Register rt, const MemOperand& dst) {
int SX = 1;
emit(STXVD | rt.code() * B21 | dst.ra().code() * B16 | dst.rb().code() * B11 |
SX);
}
// Pseudo instructions. // Pseudo instructions.
void Assembler::nop(int type) { void Assembler::nop(int type) {
Register reg = r0; Register reg = r0;
......
...@@ -1019,6 +1019,8 @@ class Assembler : public AssemblerBase { ...@@ -1019,6 +1019,8 @@ class Assembler : public AssemblerBase {
void mfvsrd(const Register ra, const Simd128Register r); void mfvsrd(const Register ra, const Simd128Register r);
void mfvsrwz(const Register ra, const Simd128Register r); void mfvsrwz(const Register ra, const Simd128Register r);
void mtvsrd(const Simd128Register rt, const Register ra); void mtvsrd(const Simd128Register rt, const Register ra);
void lxvd(const Simd128Register rt, const MemOperand& src);
void stxvd(const Simd128Register rt, const MemOperand& src);
// Pseudo instructions // Pseudo instructions
......
...@@ -2044,7 +2044,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2044,7 +2044,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
bool is_atomic = i.InputInt32(2); bool is_atomic = i.InputInt32(2);
// lvx only supports MRR. // lvx only supports MRR.
DCHECK_EQ(mode, kMode_MRR); DCHECK_EQ(mode, kMode_MRR);
__ lvx(result, operand); // lvx needs the stack to be 16 byte aligned.
// We first use lxvd/stxvd to copy the content on an aligned address. lxvd
// itself reverses the lanes so it cannot be used as is.
__ lxvd(kScratchDoubleReg, operand);
__ mr(kScratchReg, sp);
__ ClearRightImm(
sp, sp,
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
__ addi(sp, sp, Operand(-16));
__ li(r0, Operand(0));
__ stxvd(kScratchDoubleReg, MemOperand(sp, r0));
// Load it with correct lane ordering.
__ lvx(result, MemOperand(sp, r0));
__ mr(sp, kScratchReg);
if (is_atomic) __ lwsync(); if (is_atomic) __ lwsync();
DCHECK_EQ(LeaveRC, i.OutputRCBit()); DCHECK_EQ(LeaveRC, i.OutputRCBit());
break; break;
...@@ -2078,7 +2091,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2078,7 +2091,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (is_atomic) __ lwsync(); if (is_atomic) __ lwsync();
// stvx only supports MRR. // stvx only supports MRR.
DCHECK_EQ(mode, kMode_MRR); DCHECK_EQ(mode, kMode_MRR);
__ stvx(value, operand); // stvx needs the stack to be 16 byte aligned.
// We use lxvd/stxvd to store the content on an aligned address. stxvd
// itself reverses the lanes so it cannot be used as is.
__ mr(kScratchReg, sp);
__ ClearRightImm(
sp, sp,
Operand(base::bits::WhichPowerOfTwo(16))); // equivalent to &= -16
__ addi(sp, sp, Operand(-16));
__ li(r0, Operand(0));
__ stvx(value, MemOperand(sp, r0));
__ lxvd(kScratchDoubleReg, MemOperand(sp, r0));
__ stxvd(kScratchDoubleReg, operand);
__ mr(sp, kScratchReg);
if (is_atomic) __ sync(); if (is_atomic) __ sync();
DCHECK_EQ(LeaveRC, i.OutputRCBit()); DCHECK_EQ(LeaveRC, i.OutputRCBit());
break; break;
......
...@@ -634,6 +634,22 @@ void Decoder::DecodeExt1(Instruction* instr) { ...@@ -634,6 +634,22 @@ void Decoder::DecodeExt1(Instruction* instr) {
void Decoder::DecodeExt2(Instruction* instr) { void Decoder::DecodeExt2(Instruction* instr) {
// Some encodings are 10-1 bits, handle those first // Some encodings are 10-1 bits, handle those first
switch (EXT2 | (instr->BitField(10, 1))) { switch (EXT2 | (instr->BitField(10, 1))) {
case LVX: {
Format(instr, "lvx 'Dt, 'ra, 'rb");
return;
}
case STVX: {
Format(instr, "stvx 'Dt, 'ra, 'rb");
return;
}
case LXVD: {
Format(instr, "lxvd 'Dt, 'ra, 'rb");
return;
}
case STXVD: {
Format(instr, "stxvd 'Dt, 'ra, 'rb");
return;
}
case SRWX: { case SRWX: {
Format(instr, "srw'. 'ra, 'rs, 'rb"); Format(instr, "srw'. 'ra, 'rs, 'rb");
return; return;
...@@ -942,10 +958,6 @@ void Decoder::DecodeExt2(Instruction* instr) { ...@@ -942,10 +958,6 @@ void Decoder::DecodeExt2(Instruction* instr) {
Format(instr, "sthux 'rs, 'ra, 'rb"); Format(instr, "sthux 'rs, 'ra, 'rb");
return; return;
} }
case STVX: {
Format(instr, "stvx 'Dt, 'ra, 'rb");
return;
}
case LWZX: { case LWZX: {
Format(instr, "lwzx 'rt, 'ra, 'rb"); Format(instr, "lwzx 'rt, 'ra, 'rb");
return; return;
...@@ -990,10 +1002,6 @@ void Decoder::DecodeExt2(Instruction* instr) { ...@@ -990,10 +1002,6 @@ void Decoder::DecodeExt2(Instruction* instr) {
Format(instr, "lwarx 'rt, 'ra, 'rb"); Format(instr, "lwarx 'rt, 'ra, 'rb");
return; return;
} }
case LVX: {
Format(instr, "lvx 'Dt, 'ra, 'rb");
return;
}
#if V8_TARGET_ARCH_PPC64 #if V8_TARGET_ARCH_PPC64
case LDX: { case LDX: {
Format(instr, "ldx 'rt, 'ra, 'rb"); Format(instr, "ldx 'rt, 'ra, 'rb");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment