Commit c4d90a74 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement f64x2 comparisons for ia32

Bug: v8:9728
Change-Id: If1572283d464d7d9c33a6ba6a9b9b8cf42efc31a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1834768Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64200}
parent 61815a22
...@@ -2192,6 +2192,15 @@ void Assembler::cmpps(XMMRegister dst, Operand src, uint8_t cmp) { ...@@ -2192,6 +2192,15 @@ void Assembler::cmpps(XMMRegister dst, Operand src, uint8_t cmp) {
EMIT(cmp); EMIT(cmp);
} }
void Assembler::cmppd(XMMRegister dst, Operand src, uint8_t cmp) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xC2);
emit_sse_operand(dst, src);
EMIT(cmp);
}
void Assembler::sqrtsd(XMMRegister dst, Operand src) { void Assembler::sqrtsd(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
EMIT(0xF2); EMIT(0xF2);
...@@ -2764,6 +2773,12 @@ void Assembler::vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, ...@@ -2764,6 +2773,12 @@ void Assembler::vcmpps(XMMRegister dst, XMMRegister src1, Operand src2,
EMIT(cmp); EMIT(cmp);
} }
void Assembler::vcmppd(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t cmp) {
vpd(0xC2, dst, src1, src2);
EMIT(cmp);
}
void Assembler::vshufps(XMMRegister dst, XMMRegister src1, Operand src2, void Assembler::vshufps(XMMRegister dst, XMMRegister src1, Operand src2,
byte imm8) { byte imm8) {
DCHECK(is_uint8(imm8)); DCHECK(is_uint8(imm8));
......
...@@ -874,17 +874,29 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -874,17 +874,29 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) { void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) {
cmpps(dst, Operand(src), cmp); cmpps(dst, Operand(src), cmp);
} }
void cmppd(XMMRegister dst, Operand src, uint8_t cmp);
void cmppd(XMMRegister dst, XMMRegister src, uint8_t cmp) {
cmppd(dst, Operand(src), cmp);
}
// Packed floating-point comparison operations.
#define PACKED_CMP_LIST(V) \
V(cmpeq, 0x0) \
V(cmplt, 0x1) \
V(cmple, 0x2) \
V(cmpneq, 0x4)
#define SSE_CMP_P(instr, imm8) \ #define SSE_CMP_P(instr, imm8) \
void instr##ps(XMMRegister dst, XMMRegister src) { \ void instr##ps(XMMRegister dst, XMMRegister src) { \
cmpps(dst, Operand(src), imm8); \ cmpps(dst, Operand(src), imm8); \
} \ } \
void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
void instr##pd(XMMRegister dst, XMMRegister src) { \
SSE_CMP_P(cmpeq, 0x0) cmppd(dst, Operand(src), imm8); \
SSE_CMP_P(cmplt, 0x1) } \
SSE_CMP_P(cmple, 0x2) void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
SSE_CMP_P(cmpneq, 0x4)
PACKED_CMP_LIST(SSE_CMP_P)
#undef SSE_CMP_P #undef SSE_CMP_P
// SSE2 instructions // SSE2 instructions
...@@ -1529,24 +1541,30 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1529,24 +1541,30 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
PACKED_OP_LIST(AVX_PACKED_OP_DECLARE) PACKED_OP_LIST(AVX_PACKED_OP_DECLARE)
#undef AVX_PACKED_OP_DECLARE #undef AVX_PACKED_OP_DECLARE
#undef PACKED_OP_LIST #undef PACKED_OP_LIST
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2); void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2); void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp); void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
#define AVX_CMP_P(instr, imm8) \ #define AVX_CMP_P(instr, imm8) \
void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ void v##instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
vcmpps(dst, src1, Operand(src2), imm8); \ vcmpps(dst, src1, Operand(src2), imm8); \
} \ } \
void instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \ void v##instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
vcmpps(dst, src1, src2, imm8); \ vcmpps(dst, src1, src2, imm8); \
} \
void v##instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
vcmppd(dst, src1, Operand(src2), imm8); \
} \
void v##instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
vcmppd(dst, src1, src2, imm8); \
} }
AVX_CMP_P(vcmpeq, 0x0) PACKED_CMP_LIST(AVX_CMP_P)
AVX_CMP_P(vcmplt, 0x1)
AVX_CMP_P(vcmple, 0x2)
AVX_CMP_P(vcmpneq, 0x4)
#undef AVX_CMP_P #undef AVX_CMP_P
#undef PACKED_CMP_LIST
// Other SSE and AVX instructions // Other SSE and AVX instructions
#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \ #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
......
...@@ -318,6 +318,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -318,6 +318,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Subpd, subpd) AVX_PACKED_OP3(Subpd, subpd)
AVX_PACKED_OP3(Mulpd, mulpd) AVX_PACKED_OP3(Mulpd, mulpd)
AVX_PACKED_OP3(Divpd, divpd) AVX_PACKED_OP3(Divpd, divpd)
AVX_PACKED_OP3(Cmpeqpd, cmpeqpd)
AVX_PACKED_OP3(Cmpneqpd, cmpneqpd)
AVX_PACKED_OP3(Cmpltpd, cmpltpd)
AVX_PACKED_OP3(Cmplepd, cmplepd)
#undef AVX_PACKED_OP3 #undef AVX_PACKED_OP3
#undef AVX_PACKED_OP3_WITH_TYPE #undef AVX_PACKED_OP3_WITH_TYPE
......
...@@ -1924,6 +1924,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1924,6 +1924,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kIA32F64x2Eq: {
__ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Ne: {
__ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Lt: {
__ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Le: {
__ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Splat: { case kSSEF32x4Splat: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
......
...@@ -127,6 +127,10 @@ namespace compiler { ...@@ -127,6 +127,10 @@ namespace compiler {
V(IA32F64x2Sub) \ V(IA32F64x2Sub) \
V(IA32F64x2Mul) \ V(IA32F64x2Mul) \
V(IA32F64x2Div) \ V(IA32F64x2Div) \
V(IA32F64x2Eq) \
V(IA32F64x2Ne) \
V(IA32F64x2Lt) \
V(IA32F64x2Le) \
V(SSEF32x4Splat) \ V(SSEF32x4Splat) \
V(AVXF32x4Splat) \ V(AVXF32x4Splat) \
V(SSEF32x4ExtractLane) \ V(SSEF32x4ExtractLane) \
......
...@@ -108,6 +108,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -108,6 +108,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F64x2Sub: case kIA32F64x2Sub:
case kIA32F64x2Mul: case kIA32F64x2Mul:
case kIA32F64x2Div: case kIA32F64x2Div:
case kIA32F64x2Eq:
case kIA32F64x2Ne:
case kIA32F64x2Lt:
case kIA32F64x2Le:
case kSSEF32x4Splat: case kSSEF32x4Splat:
case kAVXF32x4Splat: case kAVXF32x4Splat:
case kSSEF32x4ExtractLane: case kSSEF32x4ExtractLane:
......
...@@ -864,7 +864,11 @@ void InstructionSelector::VisitWord32Ror(Node* node) { ...@@ -864,7 +864,11 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add) \ V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add) \
V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub) \ V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub) \
V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul) \ V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul) \
V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div) V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div) \
V(F64x2Eq, kIA32F64x2Eq, kIA32F64x2Eq) \
V(F64x2Ne, kIA32F64x2Ne, kIA32F64x2Ne) \
V(F64x2Lt, kIA32F64x2Lt, kIA32F64x2Lt) \
V(F64x2Le, kIA32F64x2Le, kIA32F64x2Le)
#define FLOAT_UNOP_LIST(V) \ #define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \ V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
......
...@@ -2632,13 +2632,13 @@ void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); } ...@@ -2632,13 +2632,13 @@ void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
......
...@@ -1224,6 +1224,15 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -1224,6 +1224,15 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
current += PrintRightOperand(current); current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop)); AppendToBuffer(",%s", NameOfXMMRegister(regop));
break; break;
case 0xC2: {
const char* const pseudo_op[] = {"eq", "lt", "le", "unord", "neq"};
AppendToBuffer("vcmppd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(", (%s)", pseudo_op[*current]);
current++;
break;
}
case 0xC4: case 0xC4:
AppendToBuffer("vpinsrw %s,%s,", NameOfXMMRegister(regop), AppendToBuffer("vpinsrw %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
...@@ -2287,6 +2296,15 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -2287,6 +2296,15 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
AppendToBuffer("movd "); AppendToBuffer("movd ");
data += PrintRightOperand(data); data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop)); AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0xC2) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
const char* const pseudo_op[] = {"eq", "lt", "le", "unord", "neq"};
AppendToBuffer("cmppd %s, ", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
AppendToBuffer(", (%s)", pseudo_op[*data]);
data++;
} else if (*data == 0xC4) { } else if (*data == 0xC4) {
data++; data++;
int mod, regop, rm; int mod, regop, rm;
......
...@@ -512,6 +512,15 @@ TEST(DisasmIa320) { ...@@ -512,6 +512,15 @@ TEST(DisasmIa320) {
__ maxpd(xmm1, xmm0); __ maxpd(xmm1, xmm0);
__ maxpd(xmm1, Operand(ebx, ecx, times_4, 10000)); __ maxpd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ cmpeqpd(xmm5, xmm1);
__ cmpeqpd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ cmpltpd(xmm5, xmm1);
__ cmpltpd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ cmplepd(xmm5, xmm1);
__ cmplepd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ cmpneqpd(xmm5, xmm1);
__ cmpneqpd(xmm5, Operand(ebx, ecx, times_4, 10000));
__ psllw(xmm0, 17); __ psllw(xmm0, 17);
__ pslld(xmm0, 17); __ pslld(xmm0, 17);
__ psrlw(xmm0, 17); __ psrlw(xmm0, 17);
...@@ -702,6 +711,15 @@ TEST(DisasmIa320) { ...@@ -702,6 +711,15 @@ TEST(DisasmIa320) {
__ vmaxpd(xmm0, xmm1, xmm2); __ vmaxpd(xmm0, xmm1, xmm2);
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); __ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vcmpeqpd(xmm5, xmm4, xmm1);
__ vcmpeqpd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vcmpltpd(xmm5, xmm4, xmm1);
__ vcmpltpd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vcmplepd(xmm5, xmm4, xmm1);
__ vcmplepd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vcmpneqpd(xmm5, xmm4, xmm1);
__ vcmpneqpd(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
__ vpsllw(xmm0, xmm7, 21); __ vpsllw(xmm0, xmm7, 21);
__ vpslld(xmm0, xmm7, 21); __ vpslld(xmm0, xmm7, 21);
__ vpsrlw(xmm0, xmm7, 21); __ vpsrlw(xmm0, xmm7, 21);
......
...@@ -286,7 +286,7 @@ T Sqrt(T a) { ...@@ -286,7 +286,7 @@ T Sqrt(T a) {
return std::sqrt(a); return std::sqrt(a);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
// only used for F64x2 tests below // only used for F64x2 tests below
int64_t Equal(double a, double b) { return a == b ? -1 : 0; } int64_t Equal(double a, double b) { return a == b ? -1 : 0; }
...@@ -300,6 +300,7 @@ int64_t Less(double a, double b) { return a < b ? -1 : 0; } ...@@ -300,6 +300,7 @@ int64_t Less(double a, double b) { return a < b ? -1 : 0; }
int64_t LessEqual(double a, double b) { return a <= b ? -1 : 0; } int64_t LessEqual(double a, double b) { return a <= b ? -1 : 0; }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
// Only used for qfma and qfms tests below. // Only used for qfma and qfms tests below.
// FMOperation holds the params (a, b, c) for a Multiply-Add or // FMOperation holds the params (a, b, c) for a Multiply-Add or
...@@ -383,6 +384,7 @@ bool ExpectFused(ExecutionTier tier) { ...@@ -383,6 +384,7 @@ bool ExpectFused(ExecutionTier tier) {
#endif #endif
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
} // namespace } // namespace
...@@ -1363,7 +1365,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) { ...@@ -1363,7 +1365,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) { WasmOpcode opcode, DoubleCompareOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd); WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
...@@ -1418,6 +1419,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Le) { ...@@ -1418,6 +1419,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Le) {
RunF64x2CompareOpTest(execution_tier, lower_simd, kExprF64x2Le, LessEqual); RunF64x2CompareOpTest(execution_tier, lower_simd, kExprF64x2Le, LessEqual);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2Min) { WASM_SIMD_TEST_NO_LOWERING(F64x2Min) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Min, JSMin); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Min, JSMin);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment