Commit 9cc41406 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

S390 [simd]: Implement vector load and zero

This CL takes advantage of the z15 `load byte reverse element`
instruction to optimize Simd Load and Zero opcodes.

On the simulator we only run `load element` as reversing is
not required.

Change-Id: I868bda865249cdc525f804c8ddf4d45df5977a86
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3132965Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#76610}
parent 2e5e2f15
......@@ -3910,6 +3910,7 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
}
}
// Vector LE Load and Transform instructions.
void TurboAssembler::LoadAndSplat8x16LE(Simd128Register dst,
const MemOperand& mem) {
vlrep(dst, mem, Condition(0));
......@@ -3960,6 +3961,26 @@ LOAD_EXTEND_LIST(LOAD_EXTEND)
#undef LOAD_EXTEND
#undef LOAD_EXTEND
void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vlebrf(dst, mem, Condition(3));
return;
}
LoadU32LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 3), Condition(2));
}
void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vlebrg(dst, mem, Condition(1));
return;
}
LoadU64LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 1), Condition(3));
}
#else
void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
Register scratch) {
......@@ -4032,6 +4053,7 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
StoreV128(src, mem, scratch1);
}
// Vector LE Load and Transform instructions.
#define LOAD_SPLAT_LIST(V) \
V(64x2, 3) \
V(32x4, 2) \
......@@ -4066,6 +4088,16 @@ LOAD_EXTEND_LIST(LOAD_EXTEND)
#undef LOAD_EXTEND
#undef LOAD_EXTEND
void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
vlef(dst, mem, Condition(3));
}
void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
vleg(dst, mem, Condition(1));
}
#endif
// Load And Test (Reg <- Reg)
......
......@@ -392,6 +392,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register scratch1);
void LoadF64LE(DoubleRegister dst, const MemOperand& opnd, Register scratch);
void LoadF32LE(DoubleRegister dst, const MemOperand& opnd, Register scratch);
// Vector LE Load and Transform instructions.
void LoadAndSplat64x2LE(Simd128Register dst, const MemOperand& mem);
void LoadAndSplat32x4LE(Simd128Register dst, const MemOperand& mem);
void LoadAndSplat16x8LE(Simd128Register dst, const MemOperand& mem);
......@@ -402,6 +403,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void LoadAndExtend16x4SLE(Simd128Register dst, const MemOperand& mem);
void LoadAndExtend32x2ULE(Simd128Register dst, const MemOperand& mem);
void LoadAndExtend32x2SLE(Simd128Register dst, const MemOperand& mem);
void LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem);
void LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem);
// Load And Test
void LoadAndTest32(Register dst, Register src);
......
......@@ -3439,6 +3439,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef LOAD_EXTEND
#define LOAD_AND_ZERO(type) \
AddressingMode mode = kMode_None; \
MemOperand operand = i.MemoryOperand(&mode); \
Simd128Register dst = i.OutputSimd128Register(); \
__ LoadV##type##ZeroLE(dst, operand);
case kS390_S128Load32Zero: {
LOAD_AND_ZERO(32);
break;
}
case kS390_S128Load64Zero: {
LOAD_AND_ZERO(64);
break;
}
#undef LOAD_AND_ZERO
case kS390_StoreCompressTagged: {
CHECK(!instr->HasOutput());
size_t index = 0;
......
......@@ -382,6 +382,8 @@ namespace compiler {
V(S390_S128Load16x4U) \
V(S390_S128Load32x2S) \
V(S390_S128Load32x2U) \
V(S390_S128Load32Zero) \
V(S390_S128Load64Zero) \
V(S390_StoreSimd128) \
V(S390_LoadSimd128) \
V(S390_StoreCompressTagged) \
......
......@@ -369,6 +369,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_S128Load16x4U:
case kS390_S128Load32x2S:
case kS390_S128Load32x2U:
case kS390_S128Load32Zero:
case kS390_S128Load64Zero:
return kIsLoadOperation;
case kS390_StoreWord8:
......
......@@ -2825,6 +2825,12 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
case LoadTransformation::kS128Load32x2U:
opcode = kS390_S128Load32x2U;
break;
case LoadTransformation::kS128Load32Zero:
opcode = kS390_S128Load32Zero;
break;
case LoadTransformation::kS128Load64Zero:
opcode = kS390_S128Load64Zero;
break;
default:
UNREACHABLE();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment