Commit e151c660 authored by weiliang.lin's avatar weiliang.lin Committed by Commit bot

[x64] supplement SSE insturctions for SIMD.js

Both legacy and AVX versions

BUG=

Review-Url: https://codereview.chromium.org/2328843003
Cr-Commit-Position: refs/heads/master@{#39327}
parent bf85ca53
......@@ -1760,6 +1760,7 @@ v8_source_set("v8_base") {
"src/x64/interface-descriptors-x64.cc",
"src/x64/macro-assembler-x64.cc",
"src/x64/macro-assembler-x64.h",
"src/x64/sse-instr.h",
]
} else if (v8_current_cpu == "arm") {
sources += [
......
......@@ -553,6 +553,7 @@ DEFINE_IMPLICATION(trace_opt_verbose, trace_opt)
DEFINE_BOOL(debug_code, false, "generate extra code (assertions) for debugging")
DEFINE_BOOL(code_comments, false, "emit comments in code disassembly")
DEFINE_BOOL(enable_sse3, true, "enable use of SSE3 instructions if available")
DEFINE_BOOL(enable_ssse3, true, "enable use of SSSE3 instructions if available")
DEFINE_BOOL(enable_sse4_1, true,
"enable use of SSE4.1 instructions if available")
DEFINE_BOOL(enable_sahf, true,
......
......@@ -730,6 +730,7 @@ struct AccessorDescriptor {
enum CpuFeature {
// x86
SSE4_1,
SSSE3,
SSE3,
SAHF,
AVX,
......
......@@ -1583,6 +1583,7 @@
'x64/interface-descriptors-x64.cc',
'x64/macro-assembler-x64.cc',
'x64/macro-assembler-x64.h',
'x64/sse-instr.h',
'debug/x64/debug-x64.cc',
'full-codegen/x64/full-codegen-x64.cc',
'ic/x64/access-compiler-x64.cc',
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
// Copyright 2012 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_SSE_INSTR_H_
#define V8_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \
V(packsswb, 66, 0F, 63) \
V(packssdw, 66, 0F, 6B) \
V(packuswb, 66, 0F, 67) \
V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \
V(paddsb, 66, 0F, EC) \
V(paddsw, 66, 0F, ED) \
V(paddusb, 66, 0F, DC) \
V(paddusw, 66, 0F, DD) \
V(pcmpeqb, 66, 0F, 74) \
V(pcmpeqw, 66, 0F, 75) \
V(pcmpeqd, 66, 0F, 76) \
V(pcmpgtb, 66, 0F, 64) \
V(pcmpgtw, 66, 0F, 65) \
V(pcmpgtd, 66, 0F, 66) \
V(pmaxsw, 66, 0F, EE) \
V(pmaxub, 66, 0F, DE) \
V(pminsw, 66, 0F, EA) \
V(pminub, 66, 0F, DA) \
V(pmullw, 66, 0F, D5) \
V(pmuludq, 66, 0F, F4) \
V(psllw, 66, 0F, F1) \
V(pslld, 66, 0F, F2) \
V(psraw, 66, 0F, E1) \
V(psrad, 66, 0F, E2) \
V(psrlw, 66, 0F, D1) \
V(psrld, 66, 0F, D2) \
V(psubb, 66, 0F, F8) \
V(psubw, 66, 0F, F9) \
V(psubd, 66, 0F, FA) \
V(psubsb, 66, 0F, E8) \
V(psubsw, 66, 0F, E9) \
V(psubusb, 66, 0F, D8) \
V(psubusw, 66, 0F, D9) \
V(pxor, 66, 0F, EF) \
V(cvtps2dq, 66, 0F, 5B)
#define SSSE3_INSTRUCTION_LIST(V) \
V(pabsb, 66, 0F, 38, 1C) \
V(pabsw, 66, 0F, 38, 1D) \
V(pabsd, 66, 0F, 38, 1E) \
V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \
V(psignd, 66, 0F, 38, 0A)
#define SSE4_INSTRUCTION_LIST(V) \
V(packusdw, 66, 0F, 38, 2B) \
V(pminsb, 66, 0F, 38, 38) \
V(pminsd, 66, 0F, 38, 39) \
V(pminuw, 66, 0F, 38, 3A) \
V(pminud, 66, 0F, 38, 3B) \
V(pmaxsb, 66, 0F, 38, 3C) \
V(pmaxsd, 66, 0F, 38, 3D) \
V(pmaxuw, 66, 0F, 38, 3E) \
V(pmaxud, 66, 0F, 38, 3F) \
V(pmulld, 66, 0F, 38, 40) \
V(ptest, 66, 0F, 38, 17)
#endif // V8_SSE_INSTR_H_
......@@ -50,7 +50,7 @@ TEST(DisasmX64) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
v8::internal::byte buffer[4096];
v8::internal::byte buffer[8192];
Assembler assm(isolate, buffer, sizeof buffer);
DummyStaticFunction(NULL); // just bloody use it (DELETE; debugging)
......@@ -420,7 +420,8 @@ TEST(DisasmX64) {
__ ucomiss(xmm0, xmm1);
__ ucomiss(xmm0, Operand(rbx, rcx, times_4, 10000));
}
// SSE 2 instructions
// SSE2 instructions
{
__ cvttsd2si(rdx, Operand(rbx, rcx, times_4, 10000));
__ cvttsd2si(rdx, xmm1);
......@@ -467,6 +468,13 @@ TEST(DisasmX64) {
__ punpckldq(xmm1, xmm11);
__ punpckldq(xmm5, Operand(rdx, 4));
__ punpckhdq(xmm8, xmm15);
#define EMIT_SSE2_INSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(rdx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_INSTR)
#undef EMIT_SSE2_INSTR
}
// cmov.
......@@ -489,6 +497,24 @@ TEST(DisasmX64) {
__ cmovq(greater, rax, Operand(rdx, 3));
}
{
if (CpuFeatures::IsSupported(SSE3)) {
CpuFeatureScope scope(&assm, SSE3);
__ lddqu(xmm1, Operand(rdx, 4));
}
}
#define EMIT_SSE34_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(rdx, 4));
{
if (CpuFeatures::IsSupported(SSSE3)) {
CpuFeatureScope scope(&assm, SSSE3);
SSSE3_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
}
}
{
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope scope(&assm, SSE4_1);
......@@ -539,12 +565,10 @@ TEST(DisasmX64) {
__ movups(xmm5, xmm1);
__ movups(xmm5, Operand(rdx, 4));
__ movups(Operand(rdx, 4), xmm5);
__ paddd(xmm5, xmm1);
__ paddd(xmm5, Operand(rdx, 4));
__ psubd(xmm5, xmm1);
__ psubd(xmm5, Operand(rdx, 4));
__ pmulld(xmm5, xmm1);
__ pmulld(xmm5, Operand(rdx, 4));
__ pmullw(xmm5, xmm1);
__ pmullw(xmm5, Operand(rdx, 4));
__ pmuludq(xmm5, xmm1);
__ pmuludq(xmm5, Operand(rdx, 4));
__ psrldq(xmm5, 123);
......@@ -553,8 +577,11 @@ TEST(DisasmX64) {
__ cvtps2dq(xmm5, Operand(rdx, 4));
__ cvtdq2ps(xmm5, xmm1);
__ cvtdq2ps(xmm5, Operand(rdx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
}
}
#undef EMIT_SSE34_INSTR
// AVX instruction
{
......@@ -678,6 +705,41 @@ TEST(DisasmX64) {
__ vcmpnltpd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
__ vcmpnlepd(xmm5, xmm4, xmm1);
__ vcmpnlepd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm10, xmm5, xmm1); \
__ v##instruction(xmm10, xmm5, Operand(rdx, 4));
#define EMIT_SSE34_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \
notUsed4) \
__ v##instruction(xmm10, xmm5, xmm1); \
__ v##instruction(xmm10, xmm5, Operand(rdx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
SSSE3_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
SSE4_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR
#undef EMIT_SSE34_AVXINSTR
__ vlddqu(xmm1, Operand(rbx, rcx, times_4, 10000));
__ vpsllw(xmm0, xmm15, 21);
__ vpsrlw(xmm0, xmm15, 21);
__ vpsraw(xmm0, xmm15, 21);
__ vpsrad(xmm0, xmm15, 21);
__ vpextrb(rax, xmm2, 12);
__ vpextrb(Operand(rbx, rcx, times_4, 10000), xmm2, 12);
__ vpextrw(rax, xmm2, 5);
__ vpextrw(Operand(rbx, rcx, times_4, 10000), xmm2, 5);
__ vpextrd(rax, xmm2, 2);
__ vpextrd(Operand(rbx, rcx, times_4, 10000), xmm2, 2);
__ vpinsrb(xmm1, xmm2, rax, 12);
__ vpinsrb(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 12);
__ vpinsrw(xmm1, xmm2, rax, 5);
__ vpinsrw(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 5);
__ vpinsrd(xmm1, xmm2, rax, 2);
__ vpinsrd(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 2);
__ vpshufd(xmm1, xmm2, 85);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment