Commit 3bc06ed3 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[ia32] Merge f32x4 add sub mul div SSE and AVX opcodes

Drive-by fix IWYU for instruction-scheduler-ia32.cc.

Bug: v8:11217,v8:7490
Change-Id: I7ae4fdaf3c48274e9421e6b31897ad0ea1464876
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2585254Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71750}
parent 84d05c6e
......@@ -1647,6 +1647,17 @@ void TurboAssembler::Psignd(XMMRegister dst, Operand src) {
FATAL("no AVX or SSE3 support");
}
void TurboAssembler::Haddps(XMMRegister dst, XMMRegister src1, Operand src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vhaddps(dst, src1, src2);
} else {
CpuFeatureScope scope(this, SSE3);
DCHECK_EQ(dst, src1);
haddps(dst, src2);
}
}
void TurboAssembler::Pcmpeqq(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
if (CpuFeatures::IsSupported(AVX)) {
......
......@@ -409,7 +409,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Addpd, addpd)
AVX_PACKED_OP3(Subps, subps)
AVX_PACKED_OP3(Subpd, subpd)
AVX_PACKED_OP3(Mulps, mulps)
AVX_PACKED_OP3(Mulpd, mulpd)
AVX_PACKED_OP3(Divps, divps)
AVX_PACKED_OP3(Divpd, divpd)
AVX_PACKED_OP3(Cmpeqpd, cmpeqpd)
AVX_PACKED_OP3(Cmpneqpd, cmpneqpd)
......@@ -525,6 +527,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
#undef AVX_OP3_XO_SSE4
#undef AVX_OP3_WITH_TYPE_SCOPE
void Haddps(XMMRegister dst, XMMRegister src1, Operand src2);
void Pcmpeqq(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, dst, src); }
void Pshufb(XMMRegister dst, Operand src) { Pshufb(dst, dst, src); }
......
......@@ -2437,60 +2437,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kSSEF32x4Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ addps(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXF32x4Add: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE3);
__ haddps(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXF32x4AddHoriz: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ subps(i.OutputSimd128Register(), i.InputOperand(1));
case kIA32F32x4Add: {
__ Addps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXF32x4Sub: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0),
};
case kIA32F32x4AddHoriz: {
__ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Mul: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ mulps(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXF32x4Mul: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
case kIA32F32x4Sub: {
__ Subps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Div: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ divps(i.OutputSimd128Register(), i.InputOperand(1));
case kIA32F32x4Mul: {
__ Mulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXF32x4Div: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vdivps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
case kIA32F32x4Div: {
__ Divps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Min: {
......
......@@ -166,16 +166,11 @@ namespace compiler {
V(IA32F32x4Sqrt) \
V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \
V(SSEF32x4Add) \
V(AVXF32x4Add) \
V(SSEF32x4AddHoriz) \
V(AVXF32x4AddHoriz) \
V(SSEF32x4Sub) \
V(AVXF32x4Sub) \
V(SSEF32x4Mul) \
V(AVXF32x4Mul) \
V(SSEF32x4Div) \
V(AVXF32x4Div) \
V(IA32F32x4Add) \
V(IA32F32x4AddHoriz) \
V(IA32F32x4Sub) \
V(IA32F32x4Mul) \
V(IA32F32x4Div) \
V(SSEF32x4Min) \
V(AVXF32x4Min) \
V(SSEF32x4Max) \
......
......@@ -2,7 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/base/logging.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/backend/instruction-scheduler.h"
#include "src/compiler/backend/instruction.h"
namespace v8 {
namespace internal {
......@@ -145,16 +148,11 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F32x4Sqrt:
case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox:
case kSSEF32x4Add:
case kAVXF32x4Add:
case kSSEF32x4AddHoriz:
case kAVXF32x4AddHoriz:
case kSSEF32x4Sub:
case kAVXF32x4Sub:
case kSSEF32x4Mul:
case kAVXF32x4Mul:
case kSSEF32x4Div:
case kAVXF32x4Div:
case kIA32F32x4Add:
case kIA32F32x4AddHoriz:
case kIA32F32x4Sub:
case kIA32F32x4Mul:
case kIA32F32x4Div:
case kSSEF32x4Min:
case kAVXF32x4Min:
case kSSEF32x4Max:
......
......@@ -2173,11 +2173,6 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I8x16)
#define SIMD_BINOP_LIST(V) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max) \
V(F32x4Eq) \
......@@ -2239,6 +2234,11 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(S128Xor)
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Eq) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment