Commit 4f21e4a0 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

S390 [simd]: Unify Load/Store transform ops on BE and LE

Checks if the machine is running on a native BE and if so
try to do the load/store ops using the new vector instructions
form z15.

If target machine is older than a z15, the memory offset is larger
than 12 bits or we are running in the simulator then we fall back
to the scalar method of loading/store from memory.

Change-Id: Iec41ad21e7f292cf5b9c4707b4951a561f3dcda4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3162247Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#76878}
parent 3554c2ad
......@@ -3910,125 +3910,6 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
}
}
// Vector LE Load and Transform instructions.
void TurboAssembler::LoadAndSplat8x16LE(Simd128Register dst,
const MemOperand& mem) {
vlrep(dst, mem, Condition(0));
}
#define LOAD_SPLAT_LIST(V) \
V(64x2, LoadU64LE, 3) \
V(32x4, LoadU32LE, 2) \
V(16x8, LoadU16LE, 1)
#define LOAD_SPLAT(name, scalar_instr, condition) \
void TurboAssembler::LoadAndSplat##name##LE(Simd128Register dst, \
const MemOperand& mem) { \
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) && \
is_uint12(mem.offset())) { \
vlbrrep(dst, mem, Condition(condition)); \
return; \
} \
scalar_instr(r1, mem); \
vlvg(dst, r1, MemOperand(r0, 0), Condition(condition)); \
vrep(dst, dst, Operand(0), Condition(condition)); \
}
LOAD_SPLAT_LIST(LOAD_SPLAT)
#undef LOAD_SPLAT
#undef LOAD_SPLAT_LIST
#define LOAD_EXTEND_LIST(V) \
V(32x2U, vuplh, 2) \
V(32x2S, vuph, 2) \
V(16x4U, vuplh, 1) \
V(16x4S, vuph, 1) \
V(8x8U, vuplh, 0) \
V(8x8S, vuph, 0)
#define LOAD_EXTEND(name, unpack_instr, condition) \
void TurboAssembler::LoadAndExtend##name##LE(Simd128Register dst, \
const MemOperand& mem) { \
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) && \
is_uint12(mem.offset())) { \
vlebrg(kScratchDoubleReg, mem, Condition(0)); \
} else { \
LoadU64LE(r1, mem); \
vlvg(kScratchDoubleReg, r1, MemOperand(r0, 0), Condition(3)); \
} \
unpack_instr(dst, kScratchDoubleReg, Condition(0), Condition(0), \
Condition(condition)); \
}
LOAD_EXTEND_LIST(LOAD_EXTEND)
#undef LOAD_EXTEND
#undef LOAD_EXTEND
void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vlebrf(dst, mem, Condition(3));
return;
}
LoadU32LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 3), Condition(2));
}
void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vlebrg(dst, mem, Condition(1));
return;
}
LoadU64LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 1), Condition(3));
}
void TurboAssembler::LoadLane8LE(Simd128Register dst, const MemOperand& mem,
int index) {
vleb(dst, mem, Condition(index));
}
#define LOAD_LANE_LIST(V) \
V(64, vlebrg, LoadU64LE, 3) \
V(32, vlebrf, LoadU32LE, 2) \
V(16, vlebrh, LoadU16LE, 1)
#define LOAD_LANE(name, vector_instr, scalar_instr, condition) \
void TurboAssembler::LoadLane##name##LE(Simd128Register dst, \
const MemOperand& mem, int lane) { \
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) && \
is_uint12(mem.offset())) { \
vector_instr(dst, mem, Condition(lane)); \
return; \
} \
scalar_instr(r1, mem); \
vlvg(dst, r1, MemOperand(r0, lane), Condition(condition)); \
}
LOAD_LANE_LIST(LOAD_LANE)
#undef LOAD_LANE
#undef LOAD_LANE_LIST
void TurboAssembler::StoreLane8LE(Simd128Register src, const MemOperand& mem,
int index) {
vsteb(src, mem, Condition(index));
}
#define STORE_LANE_LIST(V) \
V(64, vstebrg, StoreU64LE, 3) \
V(32, vstebrf, StoreU32LE, 2) \
V(16, vstebrh, StoreU16LE, 1)
#define STORE_LANE(name, vector_instr, scalar_instr, condition) \
void TurboAssembler::StoreLane##name##LE(Simd128Register src, \
const MemOperand& mem, int lane) { \
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) && \
is_uint12(mem.offset())) { \
vector_instr(src, mem, Condition(lane)); \
return; \
} \
vlgv(r1, src, MemOperand(r0, lane), Condition(condition)); \
scalar_instr(r1, mem); \
}
STORE_LANE_LIST(STORE_LANE)
#undef STORE_LANE
#undef STORE_LANE_LIST
#else
void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
Register scratch) {
......@@ -4101,83 +3982,6 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
StoreV128(src, mem, scratch1);
}
// Vector LE Load and Transform instructions.
#define LOAD_SPLAT_LIST(V) \
V(64x2, 3) \
V(32x4, 2) \
V(16x8, 1) \
V(8x16, 0)
#define LOAD_SPLAT(name, condition) \
void TurboAssembler::LoadAndSplat##name##LE(Simd128Register dst, \
const MemOperand& mem) { \
vlrep(dst, mem, Condition(condition)); \
}
LOAD_SPLAT_LIST(LOAD_SPLAT)
#undef LOAD_SPLAT
#undef LOAD_SPLAT_LIST
#define LOAD_EXTEND_LIST(V) \
V(32x2U, vuplh, 2) \
V(32x2S, vuph, 2) \
V(16x4U, vuplh, 1) \
V(16x4S, vuph, 1) \
V(8x8U, vuplh, 0) \
V(8x8S, vuph, 0)
#define LOAD_EXTEND(name, unpack_instr, condition) \
void TurboAssembler::LoadAndExtend##name##LE(Simd128Register dst, \
const MemOperand& mem) { \
vleg(kScratchDoubleReg, mem, Condition(0)); \
unpack_instr(dst, kScratchDoubleReg, Condition(0), Condition(0), \
Condition(condition)); \
}
LOAD_EXTEND_LIST(LOAD_EXTEND)
#undef LOAD_EXTEND
#undef LOAD_EXTEND
void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
vlef(dst, mem, Condition(3));
}
void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
vleg(dst, mem, Condition(1));
}
#define LOAD_LANE_LIST(V) \
V(64, vleg) \
V(32, vlef) \
V(16, vleh) \
V(8, vleb)
#define LOAD_LANE(name, vector_instr) \
void TurboAssembler::LoadLane##name##LE(Simd128Register dst, \
const MemOperand& mem, int lane) { \
DCHECK(is_uint12(mem.offset())); \
vector_instr(dst, mem, Condition(lane)); \
}
LOAD_LANE_LIST(LOAD_LANE)
#undef LOAD_LANE
#undef LOAD_LANE_LIST
#define STORE_LANE_LIST(V) \
V(64, vsteg) \
V(32, vstef) \
V(16, vsteh) \
V(8, vsteb)
#define STORE_LANE(name, vector_instr) \
void TurboAssembler::StoreLane##name##LE(Simd128Register src, \
const MemOperand& mem, int lane) { \
DCHECK(is_uint12(mem.offset())); \
vector_instr(src, mem, Condition(lane)); \
}
STORE_LANE_LIST(STORE_LANE)
#undef STORE_LANE
#undef STORE_LANE_LIST
#endif
// Load And Test (Reg <- Reg)
......@@ -5604,6 +5408,123 @@ void TurboAssembler::I8x16GeU(Simd128Register dst, Simd128Register src1,
vo(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
}
// Vector LE Load and Transform instructions.
#ifdef V8_TARGET_BIG_ENDIAN
#define IS_BIG_ENDIAN true
#else
#define IS_BIG_ENDIAN false
#endif
#define CAN_LOAD_STORE_REVERSE \
IS_BIG_ENDIAN&& CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)
#define LOAD_SPLAT_LIST(V) \
V(64x2, vlbrrep, LoadU64LE, 3) \
V(32x4, vlbrrep, LoadU32LE, 2) \
V(16x8, vlbrrep, LoadU16LE, 1) \
V(8x16, vlrep, LoadU8, 0)
#define LOAD_SPLAT(name, vector_instr, scalar_instr, condition) \
void TurboAssembler::LoadAndSplat##name##LE(Simd128Register dst, \
const MemOperand& mem) { \
if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \
vector_instr(dst, mem, Condition(condition)); \
return; \
} \
scalar_instr(r1, mem); \
vlvg(dst, r1, MemOperand(r0, 0), Condition(condition)); \
vrep(dst, dst, Operand(0), Condition(condition)); \
}
LOAD_SPLAT_LIST(LOAD_SPLAT)
#undef LOAD_SPLAT
#undef LOAD_SPLAT_LIST
#define LOAD_EXTEND_LIST(V) \
V(32x2U, vuplh, 2) \
V(32x2S, vuph, 2) \
V(16x4U, vuplh, 1) \
V(16x4S, vuph, 1) \
V(8x8U, vuplh, 0) \
V(8x8S, vuph, 0)
#define LOAD_EXTEND(name, unpack_instr, condition) \
void TurboAssembler::LoadAndExtend##name##LE(Simd128Register dst, \
const MemOperand& mem) { \
if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \
vlebrg(kScratchDoubleReg, mem, Condition(0)); \
} else { \
LoadU64LE(r1, mem); \
vlvg(kScratchDoubleReg, r1, MemOperand(r0, 0), Condition(3)); \
} \
unpack_instr(dst, kScratchDoubleReg, Condition(0), Condition(0), \
Condition(condition)); \
}
LOAD_EXTEND_LIST(LOAD_EXTEND)
#undef LOAD_EXTEND
#undef LOAD_EXTEND
void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {
vlebrf(dst, mem, Condition(3));
return;
}
LoadU32LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 3), Condition(2));
}
void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {
vlebrg(dst, mem, Condition(1));
return;
}
LoadU64LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 1), Condition(3));
}
#define LOAD_LANE_LIST(V) \
V(64, vlebrg, LoadU64LE, 3) \
V(32, vlebrf, LoadU32LE, 2) \
V(16, vlebrh, LoadU16LE, 1) \
V(8, vleb, LoadU8, 0)
#define LOAD_LANE(name, vector_instr, scalar_instr, condition) \
void TurboAssembler::LoadLane##name##LE(Simd128Register dst, \
const MemOperand& mem, int lane) { \
if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \
vector_instr(dst, mem, Condition(lane)); \
return; \
} \
scalar_instr(r1, mem); \
vlvg(dst, r1, MemOperand(r0, lane), Condition(condition)); \
}
LOAD_LANE_LIST(LOAD_LANE)
#undef LOAD_LANE
#undef LOAD_LANE_LIST
#define STORE_LANE_LIST(V) \
V(64, vstebrg, StoreU64LE, 3) \
V(32, vstebrf, StoreU32LE, 2) \
V(16, vstebrh, StoreU16LE, 1) \
V(8, vsteb, StoreU8, 0)
#define STORE_LANE(name, vector_instr, scalar_instr, condition) \
void TurboAssembler::StoreLane##name##LE(Simd128Register src, \
const MemOperand& mem, int lane) { \
if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \
vector_instr(src, mem, Condition(lane)); \
return; \
} \
vlgv(r1, src, MemOperand(r0, lane), Condition(condition)); \
scalar_instr(r1, mem); \
}
STORE_LANE_LIST(STORE_LANE)
#undef STORE_LANE
#undef STORE_LANE_LIST
#undef CAN_LOAD_STORE_REVERSE
#undef IS_BIG_ENDIAN
#undef kScratchDoubleReg
} // namespace internal
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment