Commit 0233cb6c authored by Yolanda Chen's avatar Yolanda Chen Committed by V8 LUCI CQ

[x64] Implement 256-bit assembly for vmovddup/vmovshdup

Bug: v8:12228
Change-Id: I49b2e1a1c837b96ea2e7cb58f42314109845b7fc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3263766Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Yolanda Chen <yolanda.chen@intel.com>
Cr-Commit-Position: refs/heads/main@{#77746}
parent 7b785f33
...@@ -3416,30 +3416,33 @@ void Assembler::pmovmskb(Register dst, XMMRegister src) { ...@@ -3416,30 +3416,33 @@ void Assembler::pmovmskb(Register dst, XMMRegister src) {
} }
// AVX instructions // AVX instructions
#define VMOV_DUP(SIMDRegister, length) \
void Assembler::vmovddup(XMMRegister dst, XMMRegister src) { void Assembler::vmovddup(SIMDRegister dst, SIMDRegister src) { \
DCHECK(IsEnabled(AVX)); DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, xmm0, src, kL128, kF2, k0F, kWIG); emit_vex_prefix(dst, xmm0, src, k##length, kF2, k0F, kWIG); \
emit(0x12); emit(0x12); \
emit_sse_operand(dst, src); emit_sse_operand(dst, src); \
} } \
\
void Assembler::vmovddup(XMMRegister dst, Operand src) { void Assembler::vmovddup(SIMDRegister dst, Operand src) { \
DCHECK(IsEnabled(AVX)); DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, xmm0, src, kL128, kF2, k0F, kWIG); emit_vex_prefix(dst, xmm0, src, k##length, kF2, k0F, kWIG); \
emit(0x12); emit(0x12); \
emit_sse_operand(dst, src); emit_sse_operand(dst, src); \
} } \
\
void Assembler::vmovshdup(XMMRegister dst, XMMRegister src) { void Assembler::vmovshdup(SIMDRegister dst, SIMDRegister src) { \
DCHECK(IsEnabled(AVX)); DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG); emit_vex_prefix(dst, xmm0, src, k##length, kF3, k0F, kWIG); \
emit(0x16); emit(0x16); \
emit_sse_operand(dst, src); emit_sse_operand(dst, src); \
} }
VMOV_DUP(XMMRegister, L128)
VMOV_DUP(YMMRegister, L256)
#undef VMOV_DUP
#define BROADCASTSS(SIMDRegister, length) \ #define BROADCASTSS(SIMDRegister, length) \
void Assembler::vbroadcastss(SIMDRegister dst, Operand src) { \ void Assembler::vbroadcastss(SIMDRegister dst, Operand src) { \
......
...@@ -1329,7 +1329,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1329,7 +1329,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// AVX instruction // AVX instruction
void vmovddup(XMMRegister dst, XMMRegister src); void vmovddup(XMMRegister dst, XMMRegister src);
void vmovddup(XMMRegister dst, Operand src); void vmovddup(XMMRegister dst, Operand src);
void vmovddup(YMMRegister dst, YMMRegister src);
void vmovddup(YMMRegister dst, Operand src);
void vmovshdup(XMMRegister dst, XMMRegister src); void vmovshdup(XMMRegister dst, XMMRegister src);
void vmovshdup(YMMRegister dst, YMMRegister src);
void vbroadcastss(XMMRegister dst, Operand src); void vbroadcastss(XMMRegister dst, Operand src);
void vbroadcastss(XMMRegister dst, XMMRegister src); void vbroadcastss(XMMRegister dst, XMMRegister src);
void vbroadcastss(YMMRegister dst, Operand src); void vbroadcastss(YMMRegister dst, Operand src);
......
...@@ -2537,6 +2537,9 @@ TEST(AssemblerX64Regmove256bit) { ...@@ -2537,6 +2537,9 @@ TEST(AssemblerX64Regmove256bit) {
__ vmovdqu(ymm9, Operand(rbx, rcx, times_4, 10000)); __ vmovdqu(ymm9, Operand(rbx, rcx, times_4, 10000));
__ vmovdqu(Operand(rbx, rcx, times_4, 10000), ymm0); __ vmovdqu(Operand(rbx, rcx, times_4, 10000), ymm0);
__ vbroadcastss(ymm7, Operand(rbx, rcx, times_4, 10000)); __ vbroadcastss(ymm7, Operand(rbx, rcx, times_4, 10000));
__ vmovddup(ymm3, ymm2);
__ vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000));
__ vmovshdup(ymm1, ymm2);
CodeDesc desc; CodeDesc desc;
masm.GetCode(isolate, &desc); masm.GetCode(isolate, &desc);
...@@ -2562,8 +2565,15 @@ TEST(AssemblerX64Regmove256bit) { ...@@ -2562,8 +2565,15 @@ TEST(AssemblerX64Regmove256bit) {
0xC5, 0xFE, 0x7F, 0x84, 0x8B, 0x10, 0x27, 0x00, 0x00, 0xC5, 0xFE, 0x7F, 0x84, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vbroadcastss ymm7, DWORD PTR [rbx+rcx*4+0x2710] // vbroadcastss ymm7, DWORD PTR [rbx+rcx*4+0x2710]
0xc4, 0xe2, 0x7d, 0x18, 0xbc, 0x8b, 0x10, 0x27, 0x00, 0xc4, 0xe2, 0x7d, 0x18, 0xbc, 0x8b, 0x10, 0x27, 0x00, 0x00,
0x00};
// vmovddup ymm3, ymm2
0xc5, 0xff, 0x12, 0xda,
// vmovddup ymm4, YMMWORD PTR [rbx+rcx*4+0x2710]
0xc5, 0xff, 0x12, 0xa4, 0x8b, 0x10, 0x27, 0x00, 0x00,
// vmovshdup ymm1, ymm2
0xc5, 0xfe, 0x16, 0xca};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected))); CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
} }
......
...@@ -1411,6 +1411,10 @@ UNINITIALIZED_TEST(DisasmX64YMMRegister) { ...@@ -1411,6 +1411,10 @@ UNINITIALIZED_TEST(DisasmX64YMMRegister) {
vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000))); vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c4e27d18bc8b10270000 vbroadcastss ymm7,[rbx+rcx*4+0x2710]", COMPARE("c4e27d18bc8b10270000 vbroadcastss ymm7,[rbx+rcx*4+0x2710]",
vbroadcastss(ymm7, Operand(rbx, rcx, times_4, 10000))); vbroadcastss(ymm7, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5ff12da vmovddup ymm3,ymm2", vmovddup(ymm3, ymm2));
COMPARE("c5ff12a48b10270000 vmovddup ymm4,[rbx+rcx*4+0x2710]",
vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5fe16ca vmovshdup ymm1,ymm2", vmovshdup(ymm1, ymm2));
} }
if (!CpuFeatures::IsSupported(AVX2)) return; if (!CpuFeatures::IsSupported(AVX2)) return;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment