Commit efd92a16 authored by jyan's avatar jyan Committed by Commit bot

s390x: implement vector support on s390

Add vfa/vfs/vfm/vfd support on s390

R=joransiu@ca.ibm.com, bjaideep@ca.ibm.com, danno@chromium.org, jkummerow@chromium.org, jochen@chromium.org
BUG=

Review-Url: https://codereview.chromium.org/2582683002
Cr-Commit-Position: refs/heads/master@{#41741}
parent 1c5e1504
......@@ -1969,20 +1969,38 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) {
DoubleRegister left = ToDoubleRegister(instr->left());
DoubleRegister right = ToDoubleRegister(instr->right());
DoubleRegister result = ToDoubleRegister(instr->result());
// All operations except MOD are computed in-place.
DCHECK(instr->op() == Token::MOD || left.is(result));
switch (instr->op()) {
case Token::ADD:
__ adbr(result, right);
if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
__ vfa(result, left, right);
} else {
DCHECK(result.is(left));
__ adbr(result, right);
}
break;
case Token::SUB:
__ sdbr(result, right);
if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
__ vfs(result, left, right);
} else {
DCHECK(result.is(left));
__ sdbr(result, right);
}
break;
case Token::MUL:
__ mdbr(result, right);
if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
__ vfm(result, left, right);
} else {
DCHECK(result.is(left));
__ mdbr(result, right);
}
break;
case Token::DIV:
__ ddbr(result, right);
if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
__ vfd(result, left, right);
} else {
DCHECK(result.is(left));
__ ddbr(result, right);
}
break;
case Token::MOD: {
__ PrepareCallCFunction(0, 2, scratch0());
......
......@@ -619,7 +619,9 @@ LInstruction* LChunkBuilder::DoArithmeticD(Token::Value op,
LOperand* left = UseRegisterAtStart(instr->BetterLeftOperand());
LOperand* right = UseRegisterAtStart(instr->BetterRightOperand());
LArithmeticD* result = new (zone()) LArithmeticD(op, left, right);
return DefineSameAsFirst(result);
return CpuFeatures::IsSupported(VECTOR_FACILITY)
? DefineAsRegister(result)
: DefineSameAsFirst(result);
}
}
......
......@@ -795,6 +795,7 @@ enum CpuFeature {
DISTINCT_OPS,
GENERAL_INSTR_EXT,
FLOATING_POINT_EXT,
VECTOR_FACILITY,
NUMBER_OF_CPU_FEATURES,
......
......@@ -138,30 +138,34 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
// The facilities we are checking for are:
// Bit 45 - Distinct Operands for instructions like ARK, SRK, etc.
// As such, we require only 1 double word
int64_t facilities[1];
facilities[0] = 0;
int64_t facilities[3] = {0L};
// LHI sets up GPR0
// STFLE is specified as .insn, as opcode is not recognized.
// We register the instructions kill r0 (LHI) and the CC (STFLE).
asm volatile(
"lhi 0,0\n"
"lhi 0,2\n"
".insn s,0xb2b00000,%0\n"
: "=Q"(facilities)
:
: "cc", "r0");
uint64_t one = static_cast<uint64_t>(1);
// Test for Distinct Operands Facility - Bit 45
if (facilities[0] & (1lu << (63 - 45))) {
if (facilities[0] & (one << (63 - 45))) {
supported_ |= (1u << DISTINCT_OPS);
}
// Test for General Instruction Extension Facility - Bit 34
if (facilities[0] & (1lu << (63 - 34))) {
if (facilities[0] & (one << (63 - 34))) {
supported_ |= (1u << GENERAL_INSTR_EXT);
}
// Test for Floating Point Extension Facility - Bit 37
if (facilities[0] & (1lu << (63 - 37))) {
if (facilities[0] & (one << (63 - 37))) {
supported_ |= (1u << FLOATING_POINT_EXT);
}
// Test for Vector Facility - Bit 129
if (facilities[2] & (one << (63 - (129 - 128)))) {
supported_ |= (1u << VECTOR_FACILITY);
}
}
#else
// All distinct ops instructions can be simulated
......@@ -171,6 +175,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
supported_ |= (1u << FLOATING_POINT_EXT);
USE(performSTFLE); // To avoid assert
supported_ |= (1u << VECTOR_FACILITY);
#endif
supported_ |= (1u << FPU);
}
......@@ -192,6 +197,7 @@ void CpuFeatures::PrintFeatures() {
printf("FPU_EXT=%d\n", CpuFeatures::IsSupported(FLOATING_POINT_EXT));
printf("GENERAL_INSTR=%d\n", CpuFeatures::IsSupported(GENERAL_INSTR_EXT));
printf("DISTINCT_OPS=%d\n", CpuFeatures::IsSupported(DISTINCT_OPS));
printf("VECTOR_FACILITY=%d\n", CpuFeatures::IsSupported(VECTOR_FACILITY));
}
Register ToRegister(int num) {
......
......@@ -758,6 +758,55 @@ class Assembler : public AssemblerBase {
void name(Register r3, Register b1, Disp d1, Register b2, Disp d2); \
void name(Register r3, const MemOperand& opnd1, const MemOperand& opnd2)
#define DECLARE_VRR_A_INSTRUCTIONS(name, opcode_name, opcode_value) \
void name(DoubleRegister v1, DoubleRegister v2, Condition m5, Condition m4, \
Condition m3) { \
uint64_t code = (static_cast<uint64_t>(opcode_value & 0xFF00)) * B32 | \
(static_cast<uint64_t>(v1.code())) * B36 | \
(static_cast<uint64_t>(v2.code())) * B32 | \
(static_cast<uint64_t>(m5 & 0xF)) * B20 | \
(static_cast<uint64_t>(m4 & 0xF)) * B16 | \
(static_cast<uint64_t>(m3 & 0xF)) * B12 | \
(static_cast<uint64_t>(opcode_value & 0x00FF)); \
emit6bytes(code); \
}
VRR_A_OPCODE_LIST(DECLARE_VRR_A_INSTRUCTIONS)
#undef DECLARE_VRR_A_INSTRUCTIONS
#define DECLARE_VRR_C_INSTRUCTIONS(name, opcode_name, opcode_value) \
void name(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3, \
Condition m6, Condition m5, Condition m4) { \
uint64_t code = (static_cast<uint64_t>(opcode_value & 0xFF00)) * B32 | \
(static_cast<uint64_t>(v1.code())) * B36 | \
(static_cast<uint64_t>(v2.code())) * B32 | \
(static_cast<uint64_t>(v3.code())) * B28 | \
(static_cast<uint64_t>(m6 & 0xF)) * B20 | \
(static_cast<uint64_t>(m5 & 0xF)) * B16 | \
(static_cast<uint64_t>(m4 & 0xF)) * B12 | \
(static_cast<uint64_t>(opcode_value & 0x00FF)); \
emit6bytes(code); \
}
VRR_C_OPCODE_LIST(DECLARE_VRR_C_INSTRUCTIONS)
#undef DECLARE_VRR_C_INSTRUCTIONS
// Single Element format
void vfa(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
vfa(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
static_cast<Condition>(3));
}
void vfs(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
vfs(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
static_cast<Condition>(3));
}
void vfm(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
vfm(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
static_cast<Condition>(3));
}
void vfd(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
vfd(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
static_cast<Condition>(3));
}
// S390 instruction sets
RX_FORM(bc);
RR_FORM(bctr);
......
......@@ -157,9 +157,33 @@ typedef uint16_t TwoByteInstr;
typedef uint32_t FourByteInstr;
typedef uint64_t SixByteInstr;
#define VRR_A_OPCODE_LIST(V) \
V(wfc, WFC, 0xE7CB) /* VECTOR FP COMPARE SCALAR */ \
V(vcdg, VCDG, 0xE7D3) /* VECTOR FP CONVERT FROM FIXED 64-BIT */ \
V(vcdlg, VCDLG, 0xE7C1) /* VECTOR FP CONVERT FROM LOGICAL 64-BIT */ \
V(vcgd, VCGD, 0xE7C2) /* VECTOR FP CONVERT TO FIXED 64-BIT */ \
V(vclgd, VCLGD, 0xE7C0) /* VECTOR FP CONVERT TO LOGICAL 64-BIT */ \
V(vfi, VFI, 0xE7C7) /* VECTOR LOAD FP INTEGER */ \
V(vlde, VLDE, 0xE7C4) /* VECTOR FP LOAD LENGTHENED */ \
V(vled, VLED, 0xE7C5) /* VECTOR FP LOAD ROUNDED */ \
V(vfpso, VFPSO, 0xE7CC) /* VECTOR FP PERFORM SIGN OPERATION */ \
V(vfsq, VFSQ, 0xE7CE) /* VECTOR FP SQUARE ROOT */ \
V(wfk, WFK, 0xE7CA) /* VECTOR FP COMPARE AND SIGNAL SCALAR */
#define VRR_C_OPCODE_LIST(V) \
V(vfa, VFA, 0xE7E3) /* VECTOR FP ADD */ \
V(vfs, VFS, 0xE7E2) /* VECTOR FP SUBTRACT */ \
V(vfm, VFM, 0xE7E7) /* VECTOR FP MULTIPLY */ \
V(vfd, VFD, 0xE7E5) /* VECTOR FP DIVIDE */
// Opcodes as defined in Appendix B-2 table
enum Opcode {
A = 0x5A, // Add (32)
#define DECLARE_OPCODES(name, opcode_name, opcode_value) \
opcode_name = opcode_value,
VRR_A_OPCODE_LIST(DECLARE_OPCODES) VRR_C_OPCODE_LIST(DECLARE_OPCODES)
#undef DECLARE_OPCODES
A = 0x5A, // Add (32)
ADB = 0xED1A, // Add (long BFP)
ADBR = 0xB31A, // Add (long BFP)
ADTR = 0xB3D2, // Add (long DFP)
......@@ -1304,6 +1328,23 @@ class Instruction {
DISALLOW_IMPLICIT_CONSTRUCTORS(Instruction);
};
#define DECLARE_FIELD_FOR_SIX_BYTE_INSTR(name, T, lo, hi) \
inline int name() const { \
return Bits<SixByteInstr, T>(47 - (lo), 47 - (hi) + 1); \
}
// VRR Instruction
class VRR_C_Instruction : Instruction {
public:
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R1Value, int, 8, 12);
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R2Value, int, 12, 16);
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R3Value, int, 16, 20);
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M6Value, uint32_t, 24, 28);
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M5Value, uint32_t, 28, 32);
DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M4Value, uint32_t, 32, 36);
inline int size() const { return 6; }
};
// I Instruction -- suspect this will not be used,
// but implement for completeness
class IInstruction : Instruction {
......
......@@ -1055,6 +1055,12 @@ bool Decoder::DecodeSixByte(Instruction* instr) {
case DUMY:
Format(instr, "dumy\t'r1, 'd2 ( 'r2d, 'r3 )");
break;
#define DECODE_VRR_C_INSTRUCTIONS(name, opcode_name, opcode_value) \
case opcode_name: \
Format(instr, #name "\t'f1,'f2,'f3"); \
break;
VRR_A_OPCODE_LIST(DECODE_VRR_C_INSTRUCTIONS)
#undef DECODE_VRR_C_INSTRUCTIONS
case LLILF:
Format(instr, "llilf\t'r1,'i7");
break;
......
......@@ -743,6 +743,11 @@ void Simulator::EvalTableInit() {
EvalTable[i] = &Simulator::Evaluate_Unknown;
}
#define CREATE_EVALUATE_TABLE(name, op_name, op_value) \
EvalTable[op_name] = &Simulator::Evaluate_##op_name;
VRR_C_OPCODE_LIST(CREATE_EVALUATE_TABLE);
#undef CREATE_EVALUATE_TABLE
EvalTable[DUMY] = &Simulator::Evaluate_DUMY;
EvalTable[BKPT] = &Simulator::Evaluate_BKPT;
EvalTable[SPM] = &Simulator::Evaluate_SPM;
......@@ -6050,6 +6055,15 @@ uintptr_t Simulator::PopAddress() {
int d2 = AS(RXEInstruction)->D2Value(); \
int length = 6;
#define DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4) \
int r1 = AS(VRR_C_Instruction)->R1Value(); \
int r2 = AS(VRR_C_Instruction)->R2Value(); \
int r3 = AS(VRR_C_Instruction)->R3Value(); \
int m6 = AS(VRR_C_Instruction)->M6Value(); \
int m5 = AS(VRR_C_Instruction)->M5Value(); \
int m4 = AS(VRR_C_Instruction)->M4Value(); \
int length = 6;
#define GET_ADDRESS(index_reg, base_reg, offset) \
(((index_reg) == 0) ? 0 : get_register(index_reg)) + \
(((base_reg) == 0) ? 0 : get_register(base_reg)) + offset
......@@ -6059,10 +6073,75 @@ int Simulator::Evaluate_Unknown(Instruction* instr) {
return 0;
}
EVALUATE(VFA) {
DCHECK_OPCODE(VFA);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
USE(m4);
DCHECK(m5 == 8);
DCHECK(m4 == 3);
double r2_val = get_double_from_d_register(r2);
double r3_val = get_double_from_d_register(r3);
double r1_val = r2_val + r3_val;
set_d_register_from_double(r1, r1_val);
return length;
}
EVALUATE(VFS) {
DCHECK_OPCODE(VFS);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
USE(m4);
DCHECK(m5 == 8);
DCHECK(m4 == 3);
double r2_val = get_double_from_d_register(r2);
double r3_val = get_double_from_d_register(r3);
double r1_val = r2_val - r3_val;
set_d_register_from_double(r1, r1_val);
return length;
}
EVALUATE(VFM) {
DCHECK_OPCODE(VFM);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
USE(m4);
DCHECK(m5 == 8);
DCHECK(m4 == 3);
double r2_val = get_double_from_d_register(r2);
double r3_val = get_double_from_d_register(r3);
double r1_val = r2_val * r3_val;
set_d_register_from_double(r1, r1_val);
return length;
}
EVALUATE(VFD) {
DCHECK_OPCODE(VFD);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m6);
USE(m5);
USE(m4);
DCHECK(m5 == 8);
DCHECK(m4 == 3);
double r2_val = get_double_from_d_register(r2);
double r3_val = get_double_from_d_register(r3);
double r1_val = r2_val / r3_val;
set_d_register_from_double(r1, r1_val);
return length;
}
EVALUATE(DUMY) {
DCHECK_OPCODE(DUMY);
DECODE_RXY_A_INSTRUCTION(r1, x2, b2, d2);
USE(r1);
USE(x2);
USE(b2);
USE(d2);
// dummy instruction does nothing.
return 6;
return length;
}
EVALUATE(CLR) {
......
......@@ -522,6 +522,11 @@ class Simulator {
static void EvalTableInit();
#define EVALUATE(name) int Evaluate_##name(Instruction* instr)
#define EVALUATE_VRR_INSTRUCTIONS(name, op_name, op_value) EVALUATE(op_name);
VRR_C_OPCODE_LIST(EVALUATE_VRR_INSTRUCTIONS)
VRR_A_OPCODE_LIST(EVALUATE_VRR_INSTRUCTIONS)
#undef EVALUATE_VRR_INSTRUCTIONS
EVALUATE(DUMY);
EVALUATE(BKPT);
EVALUATE(SPM);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment