Commit 1b2e0ddf authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

s390: [wasm-simd] Add simulator support for simd operations


Change-Id: I0352ef9e4213d6dc0f50a5406d8e167784408452
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2095755
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Reviewed-by: 's avatarJoran Siu <joransiu@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#66731}
parent 371580bb
......@@ -2333,6 +2333,13 @@ class VRR_E_Instruction : SixByteInstruction {
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M5Value, uint32_t, 28, 32)
};
class VRR_F_Instruction : SixByteInstruction {
public:
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R1Value, int, 8, 12)
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R2Value, int, 12, 16)
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R3Value, int, 16, 20)
};
class VRX_Instruction : SixByteInstruction {
public:
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R1Value, int, 8, 12)
......
......@@ -2941,44 +2941,84 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
// vector extract element
case kS390_F64x2ExtractLane: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
Operand(1 - i.InputInt8(1)), Condition(3));
#else
__ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
Operand(i.InputInt8(1)), Condition(3));
#endif
break;
}
case kS390_F32x4ExtractLane: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
Operand(3 - i.InputInt8(1)), Condition(2));
#else
__ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
Operand(i.InputInt8(1)), Condition(2));
#endif
break;
}
case kS390_I64x2ExtractLane: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
#else
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, i.InputInt8(1)), Condition(3));
#endif
break;
}
case kS390_I32x4ExtractLane: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
#else
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, i.InputInt8(1)), Condition(2));
#endif
break;
}
case kS390_I16x8ExtractLaneU: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
#else
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, i.InputInt8(1)), Condition(1));
#endif
break;
}
case kS390_I16x8ExtractLaneS: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(kScratchReg, i.InputSimd128Register(0),
MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
#else
__ vlgv(kScratchReg, i.InputSimd128Register(0),
MemOperand(r0, i.InputInt8(1)), Condition(1));
#endif
__ lghr(i.OutputRegister(), kScratchReg);
break;
}
case kS390_I8x16ExtractLaneU: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
#else
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
MemOperand(r0, i.InputInt8(1)), Condition(0));
#endif
break;
}
case kS390_I8x16ExtractLaneS: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(kScratchReg, i.InputSimd128Register(0),
MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
#else
__ vlgv(kScratchReg, i.InputSimd128Register(0),
MemOperand(r0, i.InputInt8(1)), Condition(0));
#endif
__ lgbr(i.OutputRegister(), kScratchReg);
break;
}
......@@ -2989,8 +3029,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
__ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 0),
Condition(3));
#ifdef V8_TARGET_BIG_ENDIAN
__ vlvg(kScratchDoubleReg, kScratchReg,
MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
#else
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, i.InputInt8(1)),
Condition(3));
#endif
__ vlr(dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
break;
}
......@@ -2998,10 +3043,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 0),
Condition(2));
__ vlvg(kScratchDoubleReg, kScratchReg,
MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
#else
__ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 1),
Condition(2));
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, i.InputInt8(1)),
Condition(2));
#endif
__ vlr(dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
break;
}
......@@ -3011,8 +3063,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
#ifdef V8_TARGET_BIG_ENDIAN
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
#else
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, i.InputInt8(1)), Condition(3));
#endif
break;
}
case kS390_I32x4ReplaceLane: {
......@@ -3021,8 +3078,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
#ifdef V8_TARGET_BIG_ENDIAN
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
#else
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, i.InputInt8(1)), Condition(2));
#endif
break;
}
case kS390_I16x8ReplaceLane: {
......@@ -3031,8 +3093,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
#ifdef V8_TARGET_BIG_ENDIAN
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
#else
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, i.InputInt8(1)), Condition(1));
#endif
break;
}
case kS390_I8x16ReplaceLane: {
......@@ -3041,8 +3108,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
#ifdef V8_TARGET_BIG_ENDIAN
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
#else
__ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
MemOperand(r0, i.InputInt8(1)), Condition(0));
#endif
break;
}
// vector binops
......@@ -3104,6 +3176,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(2));
break;
}
#define FLOAT_ADD_HORIZ(src0, src1, scratch0, scratch1, add0, add1) \
__ vpk(dst, src0, src1, Condition(0), Condition(0), Condition(3)); \
__ vesrl(scratch0, src0, MemOperand(r0, shift_bits), Condition(3)); \
__ vesrl(scratch1, src1, MemOperand(r0, shift_bits), Condition(3)); \
__ vpk(kScratchDoubleReg, scratch0, scratch1, Condition(0), Condition(0), \
Condition(3)); \
__ vfa(dst, add0, add1, Condition(0), Condition(0), Condition(2));
case kS390_F32x4AddHoriz: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register src1 = i.InputSimd128Register(1);
......@@ -3111,16 +3190,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DoubleRegister tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
DoubleRegister tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
constexpr int shift_bits = 32;
// generate first operand
__ vpk(dst, src1, src0, Condition(0), Condition(0), Condition(3));
// generate second operand
__ vesrl(tempFPReg1, src0, MemOperand(r0, shift_bits), Condition(3));
__ vesrl(tempFPReg2, src1, MemOperand(r0, shift_bits), Condition(3));
__ vpk(kScratchDoubleReg, tempFPReg2, tempFPReg1, Condition(0),
Condition(0), Condition(3));
// add the operands
__ vfa(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
Condition(2));
#ifdef V8_TARGET_BIG_ENDIAN
FLOAT_ADD_HORIZ(src1, src0, tempFPReg2, tempFPReg1, kScratchDoubleReg,
dst)
#else
FLOAT_ADD_HORIZ(src0, src1, tempFPReg1, tempFPReg2, dst,
kScratchDoubleReg)
#endif
#undef FLOAT_ADD_HORIZ
break;
}
case kS390_F32x4Sub: {
......@@ -3212,8 +3289,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(2));
__ vsumg(kScratchDoubleReg, src1, kScratchDoubleReg, Condition(0),
Condition(0), Condition(2));
#ifdef V8_TARGET_BIG_ENDIAN
__ vpk(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
Condition(3));
#else
__ vpk(dst, dst, kScratchDoubleReg, Condition(0), Condition(0),
Condition(3));
#endif
break;
}
case kS390_I32x4Sub: {
......@@ -3244,8 +3326,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(1));
__ vsum(kScratchDoubleReg, src1, kScratchDoubleReg, Condition(0),
Condition(0), Condition(1));
#ifdef V8_TARGET_BIG_ENDIAN
__ vpk(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
Condition(2));
#else
__ vpk(dst, dst, kScratchDoubleReg, Condition(0), Condition(0),
Condition(2));
#endif
break;
}
case kS390_I16x8Sub: {
......@@ -3710,7 +3797,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_F32x4RecipApprox: {
__ lgfi(kScratchReg, Operand(1));
__ ConvertIntToFloat(kScratchDoubleReg, kScratchReg);
#ifdef V8_TARGET_BIG_ENDIAN
__ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(0), Condition(2));
#else
__ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(1), Condition(2));
#endif
__ vfd(i.OutputSimd128Register(), kScratchDoubleReg,
i.InputSimd128Register(0), Condition(0), Condition(0),
Condition(2));
......@@ -3722,7 +3813,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(2));
__ lgfi(kScratchReg, Operand(1));
__ ConvertIntToFloat(kScratchDoubleReg, kScratchReg);
#ifdef V8_TARGET_BIG_ENDIAN
__ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(0), Condition(2));
#else
__ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(1), Condition(2));
#endif
__ vfd(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(0), Condition(2));
break;
......@@ -3841,17 +3936,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
// vector conversions
#define CONVERT_FLOAT_TO_INT32(convert) \
for (int index = 0; index < 4; index++) { \
__ vlgv(kScratchReg, kScratchDoubleReg, MemOperand(r0, index), \
Condition(2)); \
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 0), Condition(2)); \
__ convert(kScratchReg, kScratchDoubleReg, kRoundToZero); \
__ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
#define CONVERT_FLOAT_TO_INT32(convert) \
for (int index = 0; index < 4; index++) { \
__ vlgv(kScratchReg, kScratchDoubleReg, MemOperand(r0, index), \
Condition(2)); \
__ MovIntToFloat(tempFPReg1, kScratchReg); \
__ convert(kScratchReg, tempFPReg1, kRoundToZero); \
__ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
}
case kS390_I32x4SConvertF32x4: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
// NaN to 0
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
__ vfce(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
......@@ -3864,6 +3960,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_I32x4UConvertF32x4: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
// NaN to 0, negative to 0
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
......@@ -3873,21 +3970,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef CONVERT_FLOAT_TO_INT32
#define CONVERT_INT32_TO_FLOAT(convert) \
Simd128Register src = i.InputSimd128Register(0); \
Simd128Register dst = i.OutputSimd128Register(); \
for (int index = 0; index < 4; index++) { \
__ vlgv(kScratchReg, src, MemOperand(r0, index), Condition(2)); \
__ convert(kScratchDoubleReg, kScratchReg); \
__ vlgv(kScratchReg, kScratchDoubleReg, MemOperand(r0, 0), Condition(2)); \
__ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
#define CONVERT_INT32_TO_FLOAT(convert, double_index) \
Simd128Register src = i.InputSimd128Register(0); \
Simd128Register dst = i.OutputSimd128Register(); \
for (int index = 0; index < 4; index++) { \
__ vlgv(kScratchReg, src, MemOperand(r0, index), Condition(2)); \
__ convert(kScratchDoubleReg, kScratchReg); \
__ MovFloatToInt(kScratchReg, kScratchDoubleReg); \
__ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
}
case kS390_F32x4SConvertI32x4: {
CONVERT_INT32_TO_FLOAT(ConvertIntToFloat)
#ifdef V8_TARGET_BIG_ENDIAN
CONVERT_INT32_TO_FLOAT(ConvertIntToFloat, 0)
#else
CONVERT_INT32_TO_FLOAT(ConvertIntToFloat, 1)
#endif
break;
}
case kS390_F32x4UConvertI32x4: {
CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat)
#ifdef V8_TARGET_BIG_ENDIAN
CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat, 0)
#else
CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat, 1)
#endif
break;
}
#undef CONVERT_INT32_TO_FLOAT
......@@ -4044,8 +4149,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt32(4), i.InputInt32(5)};
// create 2 * 8 byte inputs indicating new indices
for (int i = 0, j = 0; i < 2; i++, j = +2) {
#ifdef V8_TARGET_BIG_ENDIAN
__ lgfi(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1]));
__ aih(i < 1 ? ip : r0, Operand(k8x16_indices[j]));
#else
__ lgfi(i < 1 ? ip : r0, Operand(k8x16_indices[j]));
__ aih(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1]));
#endif
}
__ vlvgp(kScratchDoubleReg, ip, r0);
__ vperm(dst, src0, src1, kScratchDoubleReg, Condition(0), Condition(0));
......@@ -4055,6 +4165,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
#ifdef V8_TARGET_BIG_ENDIAN
// input needs to be reversed
__ vlgv(r0, src0, MemOperand(r0, 0), Condition(3));
__ vlgv(r1, src0, MemOperand(r0, 1), Condition(3));
......@@ -4064,6 +4175,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// clear scr0
__ vx(src0, src0, src0, Condition(0), Condition(0), Condition(0));
__ vperm(dst, kScratchDoubleReg, src0, src1, Condition(0), Condition(0));
#else
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
__ vperm(dst, src0, kScratchDoubleReg, src1, Condition(0), Condition(0));
#endif
break;
}
default:
......
......@@ -2750,11 +2750,12 @@ SIMD_BOOL_LIST(SIMD_VISIT_BOOL)
#undef SIMD_VISIT_BOOL
#undef SIMD_BOOL_LIST
#define SIMD_VISIT_CONVERSION(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
S390OperandGenerator g(this); \
Emit(kS390_##Opcode, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0))); \
#define SIMD_VISIT_CONVERSION(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \
S390OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempSimd128Register()}; \
Emit(kS390_##Opcode, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
}
SIMD_CONVERSION_LIST(SIMD_VISIT_CONVERSION)
#undef SIMD_VISIT_CONVERSION
......@@ -2782,6 +2783,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
S390OperandGenerator g(this);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
#ifdef V8_TARGET_BIG_ENDIAN
// input registers are each in reverse order, we will have to remap the
// shuffle indices
int max_index = 15;
......@@ -2801,12 +2803,21 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
g.UseImmediate(Pack4Lanes(shuffle_remapped + 8)),
g.UseImmediate(Pack4Lanes(shuffle_remapped + 4)),
g.UseImmediate(Pack4Lanes(shuffle_remapped)));
#else
Emit(kS390_S8x16Shuffle, g.DefineAsRegister(node),
g.UseUniqueRegister(input0), g.UseUniqueRegister(input1),
g.UseImmediate(Pack4Lanes(shuffle)),
g.UseImmediate(Pack4Lanes(shuffle + 4)),
g.UseImmediate(Pack4Lanes(shuffle + 8)),
g.UseImmediate(Pack4Lanes(shuffle + 12)));
#endif
}
void InstructionSelector::VisitS8x16Swizzle(Node* node) {
S390OperandGenerator g(this);
Emit(kS390_S8x16Swizzle, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
void InstructionSelector::VisitS128Zero(Node* node) {
......
......@@ -747,17 +747,21 @@ void Simulator::EvalTableInit() {
#define S390_SUPPORTED_VECTOR_OPCODE_LIST(V) \
V(vst, VST, 0xE70E) /* type = VRX VECTOR STORE */ \
V(vl, VL, 0xE706) /* type = VRX VECTOR LOAD */ \
V(vlp, VLP, 0xE7DF) /* type = VRR_A VECTOR LOAD POSITIVE */ \
V(vlgv, VLGV, 0xE721) /* type = VRS_C VECTOR LOAD GR FROM VR ELEMENT */ \
V(vlvg, VLVG, 0xE722) /* type = VRS_B VECTOR LOAD VR ELEMENT FROM GR */ \
V(vlvgp, VLVGP, 0xE762) /* type = VRR_F VECTOR LOAD VR FROM GRS DISJOINT */ \
V(vrep, VREP, 0xE74D) /* type = VRI_C VECTOR REPLICATE */ \
V(vlrep, VLREP, 0xE705) /* type = VRX VECTOR LOAD AND REPLICATE */ \
V(vrepi, VREPI, 0xE745) /* type = VRI_A VECTOR REPLICATE IMMEDIATE */ \
V(vlr, VLR, 0xE756) /* type = VRR_A VECTOR LOAD */ \
V(vstef, VSTEF, 0xE70B) /* type = VRX VECTOR STORE ELEMENT (32) */ \
V(vlef, VLEF, 0xE703) /* type = VRX VECTOR LOAD ELEMENT (32) */ \
V(vavgl, VAVGL, 0xE7F0) /* type = VRR_C VECTOR AVERAGE LOGICAL */ \
V(va, VA, 0xE7F3) /* type = VRR_C VECTOR ADD */ \
V(vs, VS, 0xE7F7) /* type = VRR_C VECTOR SUBTRACT */ \
V(vml, VML, 0xE7A2) /* type = VRR_C VECTOR MULTIPLY LOW */ \
V(vnc, VNC, 0xE769) /* type = VRR_C VECTOR AND WITH COMPLEMENT */ \
V(vsum, VSUM, 0xE764) /* type = VRR_C VECTOR SUM ACROSS WORD */ \
V(vsumg, VSUMG, 0xE765) /* type = VRR_C VECTOR SUM ACROSS DOUBLEWORD */ \
V(vpk, VPK, 0xE794) /* type = VRR_C VECTOR PACK */ \
......@@ -777,14 +781,21 @@ void Simulator::EvalTableInit() {
V(vch, VCH, 0xE7FB) /* type = VRR_B VECTOR COMPARE HIGH */ \
V(vo, VO, 0xE76A) /* type = VRR_C VECTOR OR */ \
V(vn, VN, 0xE768) /* type = VRR_C VECTOR AND */ \
V(vno, VNO, 0xE768B) /* type = VRR_C VECTOR NOR */ \
V(vlc, VLC, 0xE7DE) /* type = VRR_A VECTOR LOAD COMPLEMENT */ \
V(vsel, VSEL, 0xE78D) /* type = VRR_E VECTOR SELECT */ \
V(vperm, VPERM, 0xE78C) /* type = VRR_E VECTOR PERMUTE */ \
V(vtm, VTM, 0xE7D8) /* type = VRR_A VECTOR TEST UNDER MASK */ \
V(vesl, VESL, 0xE730) /* type = VRS_A VECTOR ELEMENT SHIFT LEFT */ \
V(veslv, VESLV, 0xE770) /* type = VRR_C VECTOR ELEMENT SHIFT LEFT */ \
V(vesrl, VESRL, \
0xE738) /* type = VRS_A VECTOR ELEMENT SHIFT RIGHT LOGICAL */ \
V(vesrlv, VESRLV, \
0xE778) /* type = VRR_C VECTOR ELEMENT SHIFT RIGHT LOGICAL */ \
V(vesra, VESRA, \
0xE73A) /* type = VRS_A VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ \
V(vesrav, VESRAV, \
0xE77A) /* type = VRR_C VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ \
V(vfsq, VFSQ, 0xE7CE) /* type = VRR_A VECTOR FP SQUARE ROOT */ \
V(vfmax, VFMAX, 0xE7EF) /* type = VRR_C VECTOR FP MAXIMUM */ \
V(vfmin, VFMIN, 0xE7EE) /* type = VRR_C VECTOR FP MINIMUM */ \
......@@ -796,7 +807,10 @@ void Simulator::EvalTableInit() {
V(vfs, VFS, 0xE7E2) /* type = VRR_C VECTOR FP SUBTRACT */ \
V(vfa, VFA, 0xE7E3) /* type = VRR_C VECTOR FP ADD */ \
V(vfd, VFD, 0xE7E5) /* type = VRR_C VECTOR FP DIVIDE */ \
V(vfm, VFM, 0xE7E7) /* type = VRR_C VECTOR FP MULTIPLY */
V(vfm, VFM, 0xE7E7) /* type = VRR_C VECTOR FP MULTIPLY */ \
V(vfma, VFMA, 0xE78F) /* type = VRR_E VECTOR FP MULTIPLY AND ADD */ \
V(vfnms, VFNMS, \
0xE79E) /* type = VRR_E VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT */
#define CREATE_EVALUATE_TABLE(name, op_name, op_value) \
EvalTable[op_name] = &Simulator::Evaluate_##op_name;
......@@ -2870,6 +2884,12 @@ uintptr_t Simulator::PopAddress() {
int m5 = AS(VRR_E_Instruction)->M5Value(); \
int length = 6;
#define DECODE_VRR_F_INSTRUCTION(r1, r2, r3) \
int r1 = AS(VRR_F_Instruction)->R1Value(); \
int r2 = AS(VRR_F_Instruction)->R2Value(); \
int r3 = AS(VRR_F_Instruction)->R3Value(); \
int length = 6;
#define DECODE_VRX_INSTRUCTION(r1, x2, b2, d2, m3) \
int r1 = AS(VRX_Instruction)->R1Value(); \
int x2 = AS(VRX_Instruction)->X2Value(); \
......@@ -2927,6 +2947,78 @@ EVALUATE(VL) {
return length;
}
#define VECTOR_LOAD_POSITIVE(r1, r2, type) \
for (size_t i = 0, j = 0; j < kSimd128Size; i++, j += sizeof(type)) { \
set_simd_register_by_lane<type>( \
r1, i, abs(get_simd_register_by_lane<type>(r2, i))); \
}
EVALUATE(VLP) {
DCHECK(VL);
DECODE_VRR_A_INSTRUCTION(r1, r2, m5, m4, m3);
USE(m5);
USE(m4);
switch (m3) {
case 0: {
VECTOR_LOAD_POSITIVE(r1, r2, int8_t)
break;
}
case 1: {
VECTOR_LOAD_POSITIVE(r1, r2, int16_t)
break;
}
case 2: {
VECTOR_LOAD_POSITIVE(r1, r2, int32_t)
break;
}
case 3: {
VECTOR_LOAD_POSITIVE(r1, r2, int64_t)
break;
}
default:
UNREACHABLE();
}
return length;
}
#undef VECTOR_LOAD_POSITIVE
#define VECTOR_AVERAGE_U(r1, r2, r3, type) \
for (size_t i = 0, j = 0; j < kSimd128Size; i++, j += sizeof(type)) { \
type src0 = get_simd_register_by_lane<type>(r2, i); \
type src1 = get_simd_register_by_lane<type>(r3, i); \
set_simd_register_by_lane<type>( \
r1, i, (static_cast<type>(src0) + static_cast<type>(src1) + 1) >> 1); \
}
EVALUATE(VAVGL) {
DCHECK(VL);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
switch (m4) {
case 0: {
VECTOR_AVERAGE_U(r1, r2, r3, uint8_t)
break;
}
case 1: {
VECTOR_AVERAGE_U(r1, r2, r3, uint16_t)
break;
}
case 2: {
VECTOR_AVERAGE_U(r1, r2, r3, uint32_t)
break;
}
case 3: {
VECTOR_AVERAGE_U(r1, r2, r3, uint64_t)
break;
}
default:
UNREACHABLE();
}
return length;
}
#undef VECTOR_AVERAGE_U
EVALUATE(VLGV) {
DCHECK_OPCODE(VLGV);
DECODE_VRS_INSTRUCTION(r1, r3, b2, d2, m4);
......@@ -2950,6 +3042,14 @@ EVALUATE(VLVG) {
return length;
}
EVALUATE(VLVGP) {
DCHECK_OPCODE(VLVGP);
DECODE_VRR_F_INSTRUCTION(r1, r2, r3);
set_simd_register_by_lane<int64_t>(r1, 0, get_register(r2));
set_simd_register_by_lane<int64_t>(r1, 1, get_register(r3));
return length;
}
EVALUATE(VREP) {
DCHECK_OPCODE(VREP);
DECODE_VRI_C_INSTRUCTION(r1, r3, i2, m4);
......@@ -3084,6 +3184,20 @@ EVALUATE(VML) {
return length;
}
EVALUATE(VNC) {
DCHECK(VNC);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
USE(m4);
for (int i = 0; i < 2; i++) {
int64_t lane_1 = get_simd_register_by_lane<uint64_t>(r2, i);
int64_t lane_2 = get_simd_register_by_lane<uint64_t>(r3, i);
set_simd_register_by_lane<uint64_t>(r1, i, lane_1 & ~lane_2);
}
return length;
}
template <class S, class D>
void VectorSum(void* dst, void* src1, void* src2) {
D value = 0;
......@@ -3490,6 +3604,42 @@ EVALUATE(VX) {
return length;
}
#define VECTOR_NOR(r1, r2, r3, type) \
for (size_t i = 0, j = 0; j < kSimd128Size; i++, j += sizeof(type)) { \
type src0 = get_simd_register_by_lane<type>(r2, i); \
type src1 = get_simd_register_by_lane<type>(r3, i); \
set_simd_register_by_lane<type>(r1, i, ~(src0 | src1)); \
}
EVALUATE(VNO) {
DCHECK(VL);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
switch (m4) {
case 0: {
VECTOR_NOR(r1, r2, r3, int8_t)
break;
}
case 1: {
VECTOR_NOR(r1, r2, r3, int16_t)
break;
}
case 2: {
VECTOR_NOR(r1, r2, r3, int32_t)
break;
}
case 3: {
VECTOR_NOR(r1, r2, r3, int64_t)
break;
}
default:
UNREACHABLE();
}
return length;
}
#undef VECTOR_NOR
template <class T>
void VectorLoadComplement(void* dst, void* src) {
int8_t* src_ptr = reinterpret_cast<int8_t*>(src);
......@@ -3530,6 +3680,27 @@ EVALUATE(VLC) {
return length;
}
EVALUATE(VPERM) {
DCHECK_OPCODE(VPERM);
DECODE_VRR_E_INSTRUCTION(r1, r2, r3, r4, m6, m5);
USE(m5);
USE(m6);
for (int i = 0; i < kSimd128Size; i++) {
int8_t lane_num = get_simd_register_by_lane<int8_t>(r4, i);
int reg = r2;
if (lane_num >= kSimd128Size) {
lane_num = lane_num - kSimd128Size;
reg = r3;
}
int8_t result = 0;
if (lane_num >= 0 && lane_num < kSimd128Size * 2) {
result = get_simd_register_by_lane<int8_t>(reg, lane_num);
}
set_simd_register_by_lane<int8_t>(r1, i, result);
}
return length;
}
EVALUATE(VSEL) {
DCHECK_OPCODE(VSEL);
DECODE_VRR_E_INSTRUCTION(r1, r2, r3, r4, m6, m5);
......@@ -3606,6 +3777,65 @@ EVALUATE(VESRL) {
return length;
}
#define VECTOR_SHIFT_WITH_OPERAND_TYPE(r1, r2, r3, type, op) \
for (size_t i = 0, j = 0; j < kSimd128Size; i++, j += sizeof(type)) { \
type src0 = get_simd_register_by_lane<type>(r2, i); \
type src1 = get_simd_register_by_lane<type>(r3, i); \
set_simd_register_by_lane<type>(r1, i, \
src0 op(src1 % (sizeof(type) * 8))); \
}
#define VECTOR_SHIFT_WITH_OPERAND(r1, r2, r3, op, sign) \
switch (m4) { \
case 0: { \
VECTOR_SHIFT_WITH_OPERAND_TYPE(r1, r2, r3, sign##int8_t, op) \
break; \
} \
case 1: { \
VECTOR_SHIFT_WITH_OPERAND_TYPE(r1, r2, r3, sign##int16_t, op) \
break; \
} \
case 2: { \
VECTOR_SHIFT_WITH_OPERAND_TYPE(r1, r2, r3, sign##int32_t, op) \
break; \
} \
case 3: { \
VECTOR_SHIFT_WITH_OPERAND_TYPE(r1, r2, r3, sign##int64_t, op) \
break; \
} \
default: \
UNREACHABLE(); \
}
EVALUATE(VESLV) {
DCHECK_OPCODE(VESLV);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
VECTOR_SHIFT_WITH_OPERAND(r1, r2, r3, <<, )
return length;
}
EVALUATE(VESRAV) {
DCHECK_OPCODE(VESRAV);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
VECTOR_SHIFT_WITH_OPERAND(r1, r2, r3, >>, )
return length;
}
EVALUATE(VESRLV) {
DCHECK_OPCODE(VESRLV);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
VECTOR_SHIFT_WITH_OPERAND(r1, r2, r3, >>, u)
return length;
}
#undef VECTOR_SHIFT_WITH_OPERAND
#undef VECTOR_SHIFT_WITH_OPERAND_TYPE
EVALUATE(VTM) {
DCHECK_OPCODE(VTM);
DECODE_VRR_A_INSTRUCTION(r1, r2, m5, m4, m3);
......@@ -3688,6 +3918,63 @@ EVALUATE(VFD) {
return length;
}
#define VECTOR_FP_MULTIPLY_QFMS_OPERATION(type, op, sign, first_lane_only) \
for (size_t i = 0, j = 0; j < kSimd128Size; i++, j += sizeof(type)) { \
type src0 = get_simd_register_by_lane<type>(r2, i); \
type src1 = get_simd_register_by_lane<type>(r3, i); \
type src2 = get_simd_register_by_lane<type>(r4, i); \
type result = sign * (src0 * src1 op src2); \
if (isinf(src0)) result = src0; \
if (isinf(src1)) result = src1; \
if (isinf(src2)) result = src2; \
set_simd_register_by_lane<type>(r1, i, result); \
if (first_lane_only) break; \
}
#define VECTOR_FP_MULTIPLY_QFMS(op, sign) \
switch (m6) { \
case 2: \
DCHECK(CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)); \
if (m5 == 8) { \
VECTOR_FP_MULTIPLY_QFMS_OPERATION(float, op, sign, true) \
} else { \
DCHECK_EQ(m5, 0); \
VECTOR_FP_MULTIPLY_QFMS_OPERATION(float, op, sign, false) \
} \
break; \
case 3: \
if (m5 == 8) { \
VECTOR_FP_MULTIPLY_QFMS_OPERATION(double, op, sign, true) \
} else { \
DCHECK_EQ(m5, 0); \
VECTOR_FP_MULTIPLY_QFMS_OPERATION(double, op, sign, false) \
} \
break; \
default: \
UNREACHABLE(); \
break; \
}
EVALUATE(VFMA) {
DCHECK_OPCODE(VFMA);
DECODE_VRR_E_INSTRUCTION(r1, r2, r3, r4, m6, m5);
USE(m5);
USE(m6);
VECTOR_FP_MULTIPLY_QFMS(+, 1)
return length;
}
EVALUATE(VFNMS) {
DCHECK_OPCODE(VFNMS);
DECODE_VRR_E_INSTRUCTION(r1, r2, r3, r4, m6, m5);
USE(m5);
USE(m6);
VECTOR_FP_MULTIPLY_QFMS(-, -1)
return length;
}
#undef VECTOR_FP_MULTIPLY_QFMS
#undef VECTOR_FP_MULTIPLY_QFMS_OPERATION
template <class T, class Operation>
void VectorFPMaxMin(void* dst, void* src1, void* src2, Operation op) {
T* dst_ptr = reinterpret_cast<T*>(dst);
......@@ -3707,8 +3994,13 @@ void VectorFPMaxMin(void* dst, void* src1, void* src2, Operation op) {
#define VECTOR_FP_MAX_MIN_FOR_TYPE(type, op) \
VectorFPMaxMin<type>(&get_simd_register(r1), &get_simd_register(r2), \
&get_simd_register(r3), \
[](type a, type b) { return (a op b) ? a : b; });
&get_simd_register(r3), [](type a, type b) { \
if (signbit(b) op signbit(a)) \
return a; \
else if (signbit(b) != signbit(a)) \
return b; \
return (a op b) ? a : b; \
});
#define VECTOR_FP_MAX_MIN(op) \
switch (m4) { \
......@@ -7090,14 +7382,19 @@ EVALUATE(CFEBRA) {
break;
}
case ROUND_TOWARD_0: {
// check for overflow, cast r2_fval to 64bit integer
// check for overflow, cast r2_fval to double
// then check value within the range of INT_MIN and INT_MAX
// and set condition code accordingly
int64_t temp = static_cast<int64_t>(r2_fval);
if (temp < INT_MIN || temp > INT_MAX) {
double temp = static_cast<double>(r2_fval);
if (temp < INT_MIN) {
r1_val = kMinInt;
condition_reg_ = CC_OF;
} else if (temp > INT_MAX) {
r1_val = kMaxInt;
condition_reg_ = CC_OF;
} else {
r1_val = static_cast<int32_t>(r2_fval);
}
r1_val = static_cast<int32_t>(r2_fval);
break;
}
case ROUND_TOWARD_PLUS_INFINITE: {
......@@ -7217,8 +7514,11 @@ EVALUATE(CLFEBR) {
DECODE_RRE_INSTRUCTION(r1, r2);
float r2_val = get_float32_from_d_register(r2);
uint32_t r1_val = static_cast<uint32_t>(r2_val);
set_low_register(r1, r1_val);
SetS390ConvertConditionCode<double>(r2_val, r1_val, UINT32_MAX);
double temp = static_cast<double>(r2_val);
if (temp < 0) r1_val = 0;
if (temp > kMaxUInt32) r1_val = kMaxUInt32;
set_low_register(r1, r1_val);
return length;
}
......@@ -10900,6 +11200,7 @@ EVALUATE(CXZT) {
#undef DECODE_VRR_B_INSTRUCTION
#undef DECODE_VRR_C_INSTRUCTION
#undef DECODE_VRR_E_INSTRUCTION
#undef DECODE_VRR_F_INSTRUCTION
#undef DECODE_VRX_INSTRUCTION
#undef DECODE_VRS_INSTRUCTION
#undef DECODE_VRI_A_INSTRUCTION
......
......@@ -503,6 +503,7 @@ class Simulator : public SimulatorBase {
S390_VRR_A_OPCODE_LIST(EVALUATE_VR_INSTRUCTIONS)
S390_VRR_C_OPCODE_LIST(EVALUATE_VR_INSTRUCTIONS)
S390_VRR_E_OPCODE_LIST(EVALUATE_VR_INSTRUCTIONS)
S390_VRR_F_OPCODE_LIST(EVALUATE_VR_INSTRUCTIONS)
S390_VRX_OPCODE_LIST(EVALUATE_VR_INSTRUCTIONS)
S390_VRS_A_OPCODE_LIST(EVALUATE_VR_INSTRUCTIONS)
S390_VRS_B_OPCODE_LIST(EVALUATE_VR_INSTRUCTIONS)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment