Commit c4d90a74 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement f64x2 comparisons for ia32

Bug: v8:9728
Change-Id: If1572283d464d7d9c33a6ba6a9b9b8cf42efc31a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1834768Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64200}
parent 61815a22
......@@ -2192,6 +2192,15 @@ void Assembler::cmpps(XMMRegister dst, Operand src, uint8_t cmp) {
EMIT(cmp);
}
void Assembler::cmppd(XMMRegister dst, Operand src, uint8_t cmp) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xC2);
emit_sse_operand(dst, src);
EMIT(cmp);
}
void Assembler::sqrtsd(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0xF2);
......@@ -2764,6 +2773,12 @@ void Assembler::vcmpps(XMMRegister dst, XMMRegister src1, Operand src2,
EMIT(cmp);
}
void Assembler::vcmppd(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t cmp) {
vpd(0xC2, dst, src1, src2);
EMIT(cmp);
}
void Assembler::vshufps(XMMRegister dst, XMMRegister src1, Operand src2,
byte imm8) {
DCHECK(is_uint8(imm8));
......
......@@ -874,17 +874,29 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) {
cmpps(dst, Operand(src), cmp);
}
#define SSE_CMP_P(instr, imm8) \
void instr##ps(XMMRegister dst, XMMRegister src) { \
cmpps(dst, Operand(src), imm8); \
} \
void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); }
void cmppd(XMMRegister dst, Operand src, uint8_t cmp);
void cmppd(XMMRegister dst, XMMRegister src, uint8_t cmp) {
cmppd(dst, Operand(src), cmp);
}
SSE_CMP_P(cmpeq, 0x0)
SSE_CMP_P(cmplt, 0x1)
SSE_CMP_P(cmple, 0x2)
SSE_CMP_P(cmpneq, 0x4)
// Packed floating-point comparison operations.
#define PACKED_CMP_LIST(V) \
V(cmpeq, 0x0) \
V(cmplt, 0x1) \
V(cmple, 0x2) \
V(cmpneq, 0x4)
#define SSE_CMP_P(instr, imm8) \
void instr##ps(XMMRegister dst, XMMRegister src) { \
cmpps(dst, Operand(src), imm8); \
} \
void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
void instr##pd(XMMRegister dst, XMMRegister src) { \
cmppd(dst, Operand(src), imm8); \
} \
void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
PACKED_CMP_LIST(SSE_CMP_P)
#undef SSE_CMP_P
// SSE2 instructions
......@@ -1529,24 +1541,30 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
PACKED_OP_LIST(AVX_PACKED_OP_DECLARE)
#undef AVX_PACKED_OP_DECLARE
#undef PACKED_OP_LIST
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
#define AVX_CMP_P(instr, imm8) \
void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
vcmpps(dst, src1, Operand(src2), imm8); \
} \
void instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
vcmpps(dst, src1, src2, imm8); \
}
AVX_CMP_P(vcmpeq, 0x0)
AVX_CMP_P(vcmplt, 0x1)
AVX_CMP_P(vcmple, 0x2)
AVX_CMP_P(vcmpneq, 0x4)
void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
#define AVX_CMP_P(instr, imm8) \
void v##instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
vcmpps(dst, src1, Operand(src2), imm8); \
} \
void v##instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
vcmpps(dst, src1, src2, imm8); \
} \
void v##instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
vcmppd(dst, src1, Operand(src2), imm8); \
} \
void v##instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
vcmppd(dst, src1, src2, imm8); \
}
PACKED_CMP_LIST(AVX_CMP_P)
#undef AVX_CMP_P
#undef PACKED_CMP_LIST
// Other SSE and AVX instructions
#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
......
......@@ -318,6 +318,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Subpd, subpd)
AVX_PACKED_OP3(Mulpd, mulpd)
AVX_PACKED_OP3(Divpd, divpd)
AVX_PACKED_OP3(Cmpeqpd, cmpeqpd)
AVX_PACKED_OP3(Cmpneqpd, cmpneqpd)
AVX_PACKED_OP3(Cmpltpd, cmpltpd)
AVX_PACKED_OP3(Cmplepd, cmplepd)
#undef AVX_PACKED_OP3
#undef AVX_PACKED_OP3_WITH_TYPE
......
......@@ -1924,6 +1924,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kIA32F64x2Eq: {
__ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Ne: {
__ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Lt: {
__ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Le: {
__ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Splat: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputSimd128Register();
......
......@@ -127,6 +127,10 @@ namespace compiler {
V(IA32F64x2Sub) \
V(IA32F64x2Mul) \
V(IA32F64x2Div) \
V(IA32F64x2Eq) \
V(IA32F64x2Ne) \
V(IA32F64x2Lt) \
V(IA32F64x2Le) \
V(SSEF32x4Splat) \
V(AVXF32x4Splat) \
V(SSEF32x4ExtractLane) \
......
......@@ -108,6 +108,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F64x2Sub:
case kIA32F64x2Mul:
case kIA32F64x2Div:
case kIA32F64x2Eq:
case kIA32F64x2Ne:
case kIA32F64x2Lt:
case kIA32F64x2Le:
case kSSEF32x4Splat:
case kAVXF32x4Splat:
case kSSEF32x4ExtractLane:
......
......@@ -864,7 +864,11 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add) \
V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub) \
V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul) \
V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div)
V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div) \
V(F64x2Eq, kIA32F64x2Eq, kIA32F64x2Eq) \
V(F64x2Ne, kIA32F64x2Ne, kIA32F64x2Ne) \
V(F64x2Lt, kIA32F64x2Lt, kIA32F64x2Lt) \
V(F64x2Le, kIA32F64x2Le, kIA32F64x2Le)
#define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
......
......@@ -2632,13 +2632,13 @@ void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
......
......@@ -1224,6 +1224,15 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0xC2: {
const char* const pseudo_op[] = {"eq", "lt", "le", "unord", "neq"};
AppendToBuffer("vcmppd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(", (%s)", pseudo_op[*current]);
current++;
break;
}
case 0xC4:
AppendToBuffer("vpinsrw %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -2287,6 +2296,15 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
AppendToBuffer("movd ");
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0xC2) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
const char* const pseudo_op[] = {"eq", "lt", "le", "unord", "neq"};
AppendToBuffer("cmppd %s, ", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
AppendToBuffer(", (%s)", pseudo_op[*data]);
data++;
} else if (*data == 0xC4) {
data++;
int mod, regop, rm;
......
......@@ -512,6 +512,15 @@ TEST(DisasmIa320) {
__ maxpd(xmm1, xmm0);
__ maxpd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ cmpeqpd(xmm5, xmm1);
__ cmpeqpd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ cmpltpd(xmm5, xmm1);
__ cmpltpd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ cmplepd(xmm5, xmm1);
__ cmplepd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ cmpneqpd(xmm5, xmm1);
__ cmpneqpd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ psllw(xmm0, 17);
__ pslld(xmm0, 17);
__ psrlw(xmm0, 17);
......@@ -702,6 +711,15 @@ TEST(DisasmIa320) {
__ vmaxpd(xmm0, xmm1, xmm2);
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vcmpeqpd(xmm5, xmm4, xmm1);
__ vcmpeqpd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vcmpltpd(xmm5, xmm4, xmm1);
__ vcmpltpd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vcmplepd(xmm5, xmm4, xmm1);
__ vcmplepd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vcmpneqpd(xmm5, xmm4, xmm1);
__ vcmpneqpd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vpsllw(xmm0, xmm7, 21);
__ vpslld(xmm0, xmm7, 21);
__ vpsrlw(xmm0, xmm7, 21);
......
......@@ -286,7 +286,7 @@ T Sqrt(T a) {
return std::sqrt(a);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
// only used for F64x2 tests below
int64_t Equal(double a, double b) { return a == b ? -1 : 0; }
......@@ -300,6 +300,7 @@ int64_t Less(double a, double b) { return a < b ? -1 : 0; }
int64_t LessEqual(double a, double b) { return a <= b ? -1 : 0; }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
// Only used for qfma and qfms tests below.
// FMOperation holds the params (a, b, c) for a Multiply-Add or
......@@ -383,6 +384,7 @@ bool ExpectFused(ExecutionTier tier) {
#endif
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
} // namespace
......@@ -1363,7 +1365,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
......@@ -1418,6 +1419,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Le) {
RunF64x2CompareOpTest(execution_tier, lower_simd, kExprF64x2Le, LessEqual);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2Min) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Min, JSMin);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment