Commit 36ddd075 authored by jing.bao's avatar jing.bao Committed by Commit Bot

Add phaddd/vphaddd, phaddw/vphaddw and vhaddps to ia32

Also Add vhaddps to x64
Fix haddps for SSE3 scope and disassembler on ia32/x64

Change-Id: If511e6428fa1ce034b4281943dfee1405c9d4ffc
Reviewed-on: https://chromium-review.googlesource.com/939265Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#51642}
parent a4b615eb
......@@ -2169,6 +2169,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE3);
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
......
......@@ -2364,6 +2364,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) {
}
void Assembler::haddps(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this);
EMIT(0xF2);
EMIT(0x0F);
......
......@@ -1396,6 +1396,12 @@ class Assembler : public AssemblerBase {
void vrsqrtps(XMMRegister dst, Operand src) {
vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG);
}
void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vhaddps(dst, src1, Operand(src2));
}
void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
}
void vmovaps(XMMRegister dst, XMMRegister src) {
vps(0x28, dst, xmm0, Operand(src));
}
......
......@@ -904,6 +904,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
current++;
break;
case 0x7C:
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
default:
UnimplementedInstruction();
}
......
......@@ -44,6 +44,8 @@
V(pxor, 66, 0F, EF)
#define SSSE3_INSTRUCTION_LIST(V) \
V(phaddd, 66, 0F, 38, 02) \
V(phaddw, 66, 0F, 38, 01) \
V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \
......
......@@ -3949,6 +3949,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) {
}
void Assembler::haddps(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this);
emit(0xF2);
emit_optional_rex_32(dst, src);
......@@ -3958,6 +3959,7 @@ void Assembler::haddps(XMMRegister dst, XMMRegister src) {
}
void Assembler::haddps(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this);
emit(0xF2);
emit_optional_rex_32(dst, src);
......
......@@ -1460,6 +1460,7 @@ class Assembler : public AssemblerBase {
AVX_P_3(vor, 0x56);
AVX_P_3(vxor, 0x57);
AVX_3(vcvtsd2ss, 0x5a, vsd);
AVX_3(vhaddps, 0x7c, vsd);
#undef AVX_3
#undef AVX_S_3
......
......@@ -1156,6 +1156,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
AppendToBuffer("vlddqu %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x7C:
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
default:
UnimplementedInstruction();
}
......@@ -1980,7 +1985,7 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("haddps %s,", NameOfXMMRegister(regop));
current += PrintRightOperand(current);
current += PrintRightXMMOperand(current);
} else {
UnimplementedInstruction();
}
......
......@@ -521,7 +521,6 @@ TEST(AssemblerIa32SSE) {
__ mulps(xmm2, xmm1);
__ subps(xmm2, xmm0);
__ divps(xmm2, xmm1);
__ haddps(xmm1, xmm0);
__ cvttss2si(eax, xmm2);
__ ret(0);
}
......@@ -539,6 +538,38 @@ TEST(AssemblerIa32SSE) {
CHECK_EQ(2, f(1.0, 2.0));
}
TEST(AssemblerIa32SSE3) {
CcTest::InitializeVM();
if (!CpuFeatures::IsSupported(SSE3)) return;
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
HandleScope scope(isolate);
v8::internal::byte buffer[256];
MacroAssembler assm(isolate, buffer, sizeof(buffer),
v8::internal::CodeObjectRequired::kYes);
{
CpuFeatureScope fscope(&assm, SSE3);
__ movss(xmm0, Operand(esp, kPointerSize));
__ movss(xmm1, Operand(esp, 2 * kPointerSize));
__ shufps(xmm0, xmm0, 0x0);
__ shufps(xmm1, xmm1, 0x0);
__ haddps(xmm1, xmm0);
__ cvttss2si(eax, xmm1);
__ ret(0);
}
CodeDesc desc;
assm.GetCode(isolate, &desc);
Handle<Code> code =
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
#ifdef OBJECT_PRINT
OFStream os(stdout);
code->Print(os);
#endif
F8 f = FUNCTION_CAST<F8>(code->entry());
CHECK_EQ(4, f(1.0, 2.0));
}
typedef int (*F9)(double x, double y, double z);
TEST(AssemblerX64FMA_sd) {
......
......@@ -908,7 +908,6 @@ TEST(AssemblerX64SSE) {
__ subps(xmm2, xmm0);
__ divps(xmm2, xmm1);
__ cvttss2si(rax, xmm2);
__ haddps(xmm1, xmm0);
__ ret(0);
}
......@@ -925,6 +924,36 @@ TEST(AssemblerX64SSE) {
CHECK_EQ(2, f(1.0, 2.0));
}
TEST(AssemblerX64SSE3) {
CcTest::InitializeVM();
if (!CpuFeatures::IsSupported(SSE3)) return;
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
HandleScope scope(isolate);
v8::internal::byte buffer[256];
MacroAssembler masm(isolate, buffer, sizeof(buffer),
v8::internal::CodeObjectRequired::kYes);
{
CpuFeatureScope fscope(&masm, SSE3);
__ shufps(xmm0, xmm0, 0x0); // brocast first argument
__ shufps(xmm1, xmm1, 0x0); // brocast second argument
__ haddps(xmm1, xmm0);
__ cvttss2si(rax, xmm1);
__ ret(0);
}
CodeDesc desc;
masm.GetCode(isolate, &desc);
Handle<Code> code =
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
#ifdef OBJECT_PRINT
OFStream os(stdout);
code->Print(os);
#endif
F6 f = FUNCTION_CAST<F6>(code->entry());
CHECK_EQ(4, f(1.0, 2.0));
}
typedef int (*F7)(double x, double y, double z);
TEST(AssemblerX64FMA_sd) {
......
......@@ -500,8 +500,6 @@ TEST(DisasmIa320) {
__ maxsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ ucomisd(xmm0, xmm1);
__ cmpltsd(xmm0, xmm1);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ andpd(xmm0, xmm1);
......@@ -551,6 +549,14 @@ TEST(DisasmIa320) {
__ cmov(greater, eax, Operand(edx, 3));
}
{
if (CpuFeatures::IsSupported(SSE3)) {
CpuFeatureScope scope(&assm, SSE3);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(ebx, ecx, times_4, 10000));
}
}
#define EMIT_SSE34_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
......@@ -637,6 +643,8 @@ TEST(DisasmIa320) {
__ vmovaps(xmm0, xmm1);
__ vshufps(xmm0, xmm1, xmm2, 3);
__ vshufps(xmm0, xmm1, Operand(edx, 4), 3);
__ vhaddps(xmm0, xmm1, xmm2);
__ vhaddps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vcmpeqps(xmm5, xmm4, xmm1);
__ vcmpeqps(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
......
......@@ -475,8 +475,6 @@ TEST(DisasmX64) {
__ maxsd(xmm1, xmm0);
__ maxsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ ucomisd(xmm0, xmm1);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ andpd(xmm0, xmm1);
__ andpd(xmm0, Operand(rbx, rcx, times_4, 10000));
......@@ -530,6 +528,8 @@ TEST(DisasmX64) {
{
if (CpuFeatures::IsSupported(SSE3)) {
CpuFeatureScope scope(&assm, SSE3);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ lddqu(xmm1, Operand(rdx, 4));
}
}
......@@ -696,6 +696,8 @@ TEST(DisasmX64) {
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vxorps(xmm0, xmm1, xmm9);
__ vxorps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vhaddps(xmm0, xmm1, xmm9);
__ vhaddps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vandpd(xmm0, xmm9, xmm2);
__ vandpd(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment