Commit efed4036 authored by Milad Fa's avatar Milad Fa Committed by Commit Bot

S390: [wasm-simd] Fix the simulator to correctly represent Simd lanes

Vector register lane numbers on IBM machines are reversed
compared to x64. For example, doing an I32x4 extract_lane with
lane number 0 on x64 will be equal to lane number 3 on IBM machines.
Vector registers are only used for compiling Wasm code at the moment.
Wasm is also little endian enforced. On s390 native, we manually do
a reverse byte whenever values are loaded/stored from memory to
a Simd register. On the simulator however, we do not reverse the bytes
and data is just copied as is from one memory location to another
location which represents a register. To keep the Wasm simulation
accurate, we need to make sure accessing a lane is correctly simulated
and as such we reverse the lane number on the getters and setters.
We need to be careful when getting/setting values on the Low
or High side of a simulated register. In the simulation, "Low" is
equal to the MSB and "High" is equal to the LSB on memory.

As a result, many of the "#ifdef V8_TARGET_BIG_ENDIAN" blocks on
Simd opcodes are not needed anymore as we are now simulating
native behaviour.

Change-Id: Idfa80cdef7382febb4311c75eb6d3e1d110141fa
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2687756
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Reviewed-by: 's avatarJoran Siu <joransiu@ca.ibm.com>
Reviewed-by: 's avatarMilad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#72642}
parent 2367a714
......@@ -2711,7 +2711,6 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
S390OperandGenerator g(this);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
#ifdef V8_TARGET_BIG_ENDIAN
// Remap the shuffle indices to match IBM lane numbering.
int max_index = 15;
int total_lane_count = 2 * kSimd128Size;
......@@ -2723,7 +2722,6 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
: total_lane_count - current_index + max_index);
}
shuffle_p = &shuffle_remapped[0];
#endif
Emit(kS390_I8x16Shuffle, g.DefineAsRegister(node),
g.UseUniqueRegister(input0), g.UseUniqueRegister(input1),
g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p)),
......
......@@ -3473,11 +3473,10 @@ EVALUATE(VPKLS) {
template <class S, class D>
void VectorUnpackHigh(Simulator* sim, int dst, int src) {
constexpr size_t kItemCount = kSimd128Size / sizeof(D);
D value = 0;
for (size_t i = 0; i < kItemCount; i++) {
value = sim->get_simd_register_by_lane<S>(src, i + kItemCount);
sim->set_simd_register_by_lane<D>(dst, i, value);
}
D temps[kItemCount] = {0};
// About overwriting if src and dst are the same register.
FOR_EACH_LANE(i, D) { temps[i] = sim->get_simd_register_by_lane<S>(src, i); }
FOR_EACH_LANE(i, D) { sim->set_simd_register_by_lane<D>(dst, i, temps[i]); }
}
#define CASE(i, S, D) \
......@@ -3623,8 +3622,14 @@ void VectorUnpackLow(Simulator* sim, int dst, int src) {
constexpr size_t kItemCount = kSimd128Size / sizeof(D);
D temps[kItemCount] = {0};
// About overwriting if src and dst are the same register.
FOR_EACH_LANE(i, D) { temps[i] = sim->get_simd_register_by_lane<S>(src, i); }
FOR_EACH_LANE(i, D) { sim->set_simd_register_by_lane<D>(dst, i, temps[i]); }
// Using the "false" argument here to make sure we use the "Low" side of the
// Simd register, being simulated by the LSB in memory.
FOR_EACH_LANE(i, D) {
temps[i] = sim->get_simd_register_by_lane<S>(src, i, false);
}
FOR_EACH_LANE(i, D) {
sim->set_simd_register_by_lane<D>(dst, i, temps[i], false);
}
}
#define CASE(i, S, D) \
......@@ -3871,6 +3876,7 @@ EVALUATE(VPERM) {
DECODE_VRR_E_INSTRUCTION(r1, r2, r3, r4, m6, m5);
USE(m5);
USE(m6);
int8_t temp[kSimd128Size] = {0};
for (int i = 0; i < kSimd128Size; i++) {
int8_t lane_num = get_simd_register_by_lane<int8_t>(r4, i);
// Get the five least significant bits.
......@@ -3880,8 +3886,10 @@ EVALUATE(VPERM) {
lane_num = lane_num - kSimd128Size;
reg = r3;
}
int8_t result = get_simd_register_by_lane<int8_t>(reg, lane_num);
set_simd_register_by_lane<int8_t>(r1, i, result);
temp[i] = get_simd_register_by_lane<int8_t>(reg, lane_num);
}
for (int i = 0; i < kSimd128Size; i++) {
set_simd_register_by_lane<int8_t>(r1, i, temp[i]);
}
return length;
}
......
......@@ -137,26 +137,21 @@ class Simulator : public SimulatorBase {
void set_high_register(int reg, uint32_t value);
double get_double_from_register_pair(int reg);
// Unlike Integer values, Floating Point values are located on the left most
// side of a native 64 bit register. As FP registers are a subset of vector
// registers, 64 and 32 bit FP values need to be located on first lane (lane
// number 0) of a vector register.
template <class T>
T get_fpr(int dreg) {
DCHECK(dreg >= 0 && dreg < kNumFPRs);
if (sizeof(T) == 8) {
return get_simd_register_by_lane<T>(dreg, 0);
} else {
DCHECK_EQ(sizeof(T), 4);
return get_simd_register_by_lane<T>(dreg, 1);
}
}
template <class T>
void set_fpr(int dreg, const T val) {
DCHECK(dreg >= 0 && dreg < kNumFPRs);
if (sizeof(T) == 8) {
set_simd_register_by_lane(dreg, 0, val);
} else {
DCHECK_EQ(sizeof(T), 4);
set_simd_register_by_lane(dreg, 1, val);
}
set_simd_register_by_lane<T>(dreg, 0, val);
}
// Special case of set_register and get_register to access the raw PC value.
......@@ -412,8 +407,27 @@ class Simulator : public SimulatorBase {
set_simd_register_by_lane(reg, 0, v);
}
// Vector register lane numbers on IBM machines are reversed compared to
// x64. For example, doing an I32x4 extract_lane with lane number 0 on x64
// will be equal to lane number 3 on IBM machines. Vector registers are only
// used for compiling Wasm code at the moment. Wasm is also little endian
// enforced. On s390 native, we manually do a reverse byte whenever values are
// loaded/stored from memory to a Simd register. On the simulator however, we
// do not reverse the bytes and data is just copied as is from one memory
// location to another location which represents a register. To keep the Wasm
// simulation accurate, we need to make sure accessing a lane is correctly
// simulated and as such we reverse the lane number on the getters and setters
// below. We need to be careful when getting/setting values on the Low or High
// side of a simulated register. In the simulation, "Low" is equal to the MSB
// and "High" is equal to the LSB on memory. "force_ibm_lane_numbering" could
// be used to disabled automatic lane number reversal and help with accessing
// the Low or High side of a simulated register.
template <class T>
T get_simd_register_by_lane(int reg, int lane) {
T get_simd_register_by_lane(int reg, int lane,
bool force_ibm_lane_numbering = true) {
if (force_ibm_lane_numbering) {
lane = (kSimd128Size / sizeof(T)) - 1 - lane;
}
CHECK_LE(lane, kSimd128Size / sizeof(T));
CHECK_LT(reg, kNumFPRs);
CHECK_GE(lane, 0);
......@@ -422,7 +436,11 @@ class Simulator : public SimulatorBase {
}
template <class T>
void set_simd_register_by_lane(int reg, int lane, const T& value) {
void set_simd_register_by_lane(int reg, int lane, const T& value,
bool force_ibm_lane_numbering = true) {
if (force_ibm_lane_numbering) {
lane = (kSimd128Size / sizeof(T)) - 1 - lane;
}
CHECK_LE(lane, kSimd128Size / sizeof(T));
CHECK_LT(reg, kNumFPRs);
CHECK_GE(lane, 0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment