Commit 36ddd075 authored by jing.bao's avatar jing.bao Committed by Commit Bot

Add phaddd/vphaddd, phaddw/vphaddw and vhaddps to ia32

Also Add vhaddps to x64
Fix haddps for SSE3 scope and disassembler on ia32/x64

Change-Id: If511e6428fa1ce034b4281943dfee1405c9d4ffc
Reviewed-on: https://chromium-review.googlesource.com/939265Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#51642}
parent a4b615eb
...@@ -2169,6 +2169,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2169,6 +2169,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64F32x4AddHoriz: { case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE3);
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
......
...@@ -2364,6 +2364,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) { ...@@ -2364,6 +2364,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) {
} }
void Assembler::haddps(XMMRegister dst, Operand src) { void Assembler::haddps(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
EMIT(0xF2); EMIT(0xF2);
EMIT(0x0F); EMIT(0x0F);
......
...@@ -1396,6 +1396,12 @@ class Assembler : public AssemblerBase { ...@@ -1396,6 +1396,12 @@ class Assembler : public AssemblerBase {
void vrsqrtps(XMMRegister dst, Operand src) { void vrsqrtps(XMMRegister dst, Operand src) {
vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG); vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG);
} }
void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vhaddps(dst, src1, Operand(src2));
}
void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
}
void vmovaps(XMMRegister dst, XMMRegister src) { void vmovaps(XMMRegister dst, XMMRegister src) {
vps(0x28, dst, xmm0, Operand(src)); vps(0x28, dst, xmm0, Operand(src));
} }
......
...@@ -904,6 +904,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -904,6 +904,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current)); AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
current++; current++;
break; break;
case 0x7C:
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
default: default:
UnimplementedInstruction(); UnimplementedInstruction();
} }
......
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
V(pxor, 66, 0F, EF) V(pxor, 66, 0F, EF)
#define SSSE3_INSTRUCTION_LIST(V) \ #define SSSE3_INSTRUCTION_LIST(V) \
V(phaddd, 66, 0F, 38, 02) \
V(phaddw, 66, 0F, 38, 01) \
V(pshufb, 66, 0F, 38, 00) \ V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \ V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \ V(psignw, 66, 0F, 38, 09) \
......
...@@ -3949,6 +3949,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) { ...@@ -3949,6 +3949,7 @@ void Assembler::sqrtsd(XMMRegister dst, Operand src) {
} }
void Assembler::haddps(XMMRegister dst, XMMRegister src) { void Assembler::haddps(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
emit(0xF2); emit(0xF2);
emit_optional_rex_32(dst, src); emit_optional_rex_32(dst, src);
...@@ -3958,6 +3959,7 @@ void Assembler::haddps(XMMRegister dst, XMMRegister src) { ...@@ -3958,6 +3959,7 @@ void Assembler::haddps(XMMRegister dst, XMMRegister src) {
} }
void Assembler::haddps(XMMRegister dst, Operand src) { void Assembler::haddps(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
emit(0xF2); emit(0xF2);
emit_optional_rex_32(dst, src); emit_optional_rex_32(dst, src);
......
...@@ -1460,6 +1460,7 @@ class Assembler : public AssemblerBase { ...@@ -1460,6 +1460,7 @@ class Assembler : public AssemblerBase {
AVX_P_3(vor, 0x56); AVX_P_3(vor, 0x56);
AVX_P_3(vxor, 0x57); AVX_P_3(vxor, 0x57);
AVX_3(vcvtsd2ss, 0x5a, vsd); AVX_3(vcvtsd2ss, 0x5a, vsd);
AVX_3(vhaddps, 0x7c, vsd);
#undef AVX_3 #undef AVX_3
#undef AVX_S_3 #undef AVX_S_3
......
...@@ -1156,6 +1156,11 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -1156,6 +1156,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
AppendToBuffer("vlddqu %s,", NameOfXMMRegister(regop)); AppendToBuffer("vlddqu %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
break; break;
case 0x7C:
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
default: default:
UnimplementedInstruction(); UnimplementedInstruction();
} }
...@@ -1980,7 +1985,7 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { ...@@ -1980,7 +1985,7 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
int mod, regop, rm; int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("haddps %s,", NameOfXMMRegister(regop)); AppendToBuffer("haddps %s,", NameOfXMMRegister(regop));
current += PrintRightOperand(current); current += PrintRightXMMOperand(current);
} else { } else {
UnimplementedInstruction(); UnimplementedInstruction();
} }
......
...@@ -521,7 +521,6 @@ TEST(AssemblerIa32SSE) { ...@@ -521,7 +521,6 @@ TEST(AssemblerIa32SSE) {
__ mulps(xmm2, xmm1); __ mulps(xmm2, xmm1);
__ subps(xmm2, xmm0); __ subps(xmm2, xmm0);
__ divps(xmm2, xmm1); __ divps(xmm2, xmm1);
__ haddps(xmm1, xmm0);
__ cvttss2si(eax, xmm2); __ cvttss2si(eax, xmm2);
__ ret(0); __ ret(0);
} }
...@@ -539,6 +538,38 @@ TEST(AssemblerIa32SSE) { ...@@ -539,6 +538,38 @@ TEST(AssemblerIa32SSE) {
CHECK_EQ(2, f(1.0, 2.0)); CHECK_EQ(2, f(1.0, 2.0));
} }
TEST(AssemblerIa32SSE3) {
CcTest::InitializeVM();
if (!CpuFeatures::IsSupported(SSE3)) return;
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
HandleScope scope(isolate);
v8::internal::byte buffer[256];
MacroAssembler assm(isolate, buffer, sizeof(buffer),
v8::internal::CodeObjectRequired::kYes);
{
CpuFeatureScope fscope(&assm, SSE3);
__ movss(xmm0, Operand(esp, kPointerSize));
__ movss(xmm1, Operand(esp, 2 * kPointerSize));
__ shufps(xmm0, xmm0, 0x0);
__ shufps(xmm1, xmm1, 0x0);
__ haddps(xmm1, xmm0);
__ cvttss2si(eax, xmm1);
__ ret(0);
}
CodeDesc desc;
assm.GetCode(isolate, &desc);
Handle<Code> code =
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
#ifdef OBJECT_PRINT
OFStream os(stdout);
code->Print(os);
#endif
F8 f = FUNCTION_CAST<F8>(code->entry());
CHECK_EQ(4, f(1.0, 2.0));
}
typedef int (*F9)(double x, double y, double z); typedef int (*F9)(double x, double y, double z);
TEST(AssemblerX64FMA_sd) { TEST(AssemblerX64FMA_sd) {
......
...@@ -908,7 +908,6 @@ TEST(AssemblerX64SSE) { ...@@ -908,7 +908,6 @@ TEST(AssemblerX64SSE) {
__ subps(xmm2, xmm0); __ subps(xmm2, xmm0);
__ divps(xmm2, xmm1); __ divps(xmm2, xmm1);
__ cvttss2si(rax, xmm2); __ cvttss2si(rax, xmm2);
__ haddps(xmm1, xmm0);
__ ret(0); __ ret(0);
} }
...@@ -925,6 +924,36 @@ TEST(AssemblerX64SSE) { ...@@ -925,6 +924,36 @@ TEST(AssemblerX64SSE) {
CHECK_EQ(2, f(1.0, 2.0)); CHECK_EQ(2, f(1.0, 2.0));
} }
TEST(AssemblerX64SSE3) {
CcTest::InitializeVM();
if (!CpuFeatures::IsSupported(SSE3)) return;
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
HandleScope scope(isolate);
v8::internal::byte buffer[256];
MacroAssembler masm(isolate, buffer, sizeof(buffer),
v8::internal::CodeObjectRequired::kYes);
{
CpuFeatureScope fscope(&masm, SSE3);
__ shufps(xmm0, xmm0, 0x0); // brocast first argument
__ shufps(xmm1, xmm1, 0x0); // brocast second argument
__ haddps(xmm1, xmm0);
__ cvttss2si(rax, xmm1);
__ ret(0);
}
CodeDesc desc;
masm.GetCode(isolate, &desc);
Handle<Code> code =
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
#ifdef OBJECT_PRINT
OFStream os(stdout);
code->Print(os);
#endif
F6 f = FUNCTION_CAST<F6>(code->entry());
CHECK_EQ(4, f(1.0, 2.0));
}
typedef int (*F7)(double x, double y, double z); typedef int (*F7)(double x, double y, double z);
TEST(AssemblerX64FMA_sd) { TEST(AssemblerX64FMA_sd) {
......
...@@ -500,8 +500,6 @@ TEST(DisasmIa320) { ...@@ -500,8 +500,6 @@ TEST(DisasmIa320) {
__ maxsd(xmm1, Operand(ebx, ecx, times_4, 10000)); __ maxsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ ucomisd(xmm0, xmm1); __ ucomisd(xmm0, xmm1);
__ cmpltsd(xmm0, xmm1); __ cmpltsd(xmm0, xmm1);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ andpd(xmm0, xmm1); __ andpd(xmm0, xmm1);
...@@ -551,6 +549,14 @@ TEST(DisasmIa320) { ...@@ -551,6 +549,14 @@ TEST(DisasmIa320) {
__ cmov(greater, eax, Operand(edx, 3)); __ cmov(greater, eax, Operand(edx, 3));
} }
{
if (CpuFeatures::IsSupported(SSE3)) {
CpuFeatureScope scope(&assm, SSE3);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(ebx, ecx, times_4, 10000));
}
}
#define EMIT_SSE34_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \ #define EMIT_SSE34_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \ __ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4)); __ instruction(xmm5, Operand(edx, 4));
...@@ -637,6 +643,8 @@ TEST(DisasmIa320) { ...@@ -637,6 +643,8 @@ TEST(DisasmIa320) {
__ vmovaps(xmm0, xmm1); __ vmovaps(xmm0, xmm1);
__ vshufps(xmm0, xmm1, xmm2, 3); __ vshufps(xmm0, xmm1, xmm2, 3);
__ vshufps(xmm0, xmm1, Operand(edx, 4), 3); __ vshufps(xmm0, xmm1, Operand(edx, 4), 3);
__ vhaddps(xmm0, xmm1, xmm2);
__ vhaddps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vcmpeqps(xmm5, xmm4, xmm1); __ vcmpeqps(xmm5, xmm4, xmm1);
__ vcmpeqps(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000)); __ vcmpeqps(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
......
...@@ -475,8 +475,6 @@ TEST(DisasmX64) { ...@@ -475,8 +475,6 @@ TEST(DisasmX64) {
__ maxsd(xmm1, xmm0); __ maxsd(xmm1, xmm0);
__ maxsd(xmm1, Operand(rbx, rcx, times_4, 10000)); __ maxsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ ucomisd(xmm0, xmm1); __ ucomisd(xmm0, xmm1);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ andpd(xmm0, xmm1); __ andpd(xmm0, xmm1);
__ andpd(xmm0, Operand(rbx, rcx, times_4, 10000)); __ andpd(xmm0, Operand(rbx, rcx, times_4, 10000));
...@@ -530,6 +528,8 @@ TEST(DisasmX64) { ...@@ -530,6 +528,8 @@ TEST(DisasmX64) {
{ {
if (CpuFeatures::IsSupported(SSE3)) { if (CpuFeatures::IsSupported(SSE3)) {
CpuFeatureScope scope(&assm, SSE3); CpuFeatureScope scope(&assm, SSE3);
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ lddqu(xmm1, Operand(rdx, 4)); __ lddqu(xmm1, Operand(rdx, 4));
} }
} }
...@@ -696,6 +696,8 @@ TEST(DisasmX64) { ...@@ -696,6 +696,8 @@ TEST(DisasmX64) {
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000)); __ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vxorps(xmm0, xmm1, xmm9); __ vxorps(xmm0, xmm1, xmm9);
__ vxorps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000)); __ vxorps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vhaddps(xmm0, xmm1, xmm9);
__ vhaddps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vandpd(xmm0, xmm9, xmm2); __ vandpd(xmm0, xmm9, xmm2);
__ vandpd(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000)); __ vandpd(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment