Commit a01a02fe authored by jiepan's avatar jiepan Committed by V8 LUCI CQ

[x64] Implement 256-bit assembly for vmovdqa/u

Previous 256-bit implementation supports register
to register move, this patch adds memory to/from
register support.

Bug: v8:12228
Change-Id: I3088bae47bacf13595c76506cdc016f01a31d8b6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3160296
Commit-Queue: Jie Pan <jie.pan@intel.com>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76891}
parent 1e60c9b5
......@@ -3581,10 +3581,18 @@ void Assembler::vmovdqa(XMMRegister dst, XMMRegister src) {
emit_sse_operand(dst, src);
}
void Assembler::vmovdqa(YMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, ymm0, src, kL256, k66, k0F, kWIG);
emit(0x6F);
emit_sse_operand(dst, src);
}
void Assembler::vmovdqa(YMMRegister dst, YMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL256, k66, k0F, kWIG);
emit_vex_prefix(dst, ymm0, src, kL256, k66, k0F, kWIG);
emit(0x6F);
emit_sse_operand(dst, src);
}
......@@ -3613,10 +3621,26 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
emit_sse_operand(src, dst);
}
void Assembler::vmovdqu(YMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, ymm0, src, kL256, kF3, k0F, kWIG);
emit(0x6F);
emit_sse_operand(dst, src);
}
void Assembler::vmovdqu(Operand dst, YMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src, ymm0, dst, kL256, kF3, k0F, kWIG);
emit(0x7F);
emit_sse_operand(src, dst);
}
void Assembler::vmovdqu(YMMRegister dst, YMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src, xmm0, dst, kL256, kF3, k0F, kWIG);
emit_vex_prefix(src, ymm0, dst, kL256, kF3, k0F, kWIG);
emit(0x7F);
emit_sse_operand(src, dst);
}
......
......@@ -1350,10 +1350,13 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
void vmovdqa(XMMRegister dst, Operand src);
void vmovdqa(XMMRegister dst, XMMRegister src);
void vmovdqa(YMMRegister dst, Operand src);
void vmovdqa(YMMRegister dst, YMMRegister src);
void vmovdqu(XMMRegister dst, Operand src);
void vmovdqu(Operand dst, XMMRegister src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(YMMRegister dst, Operand src);
void vmovdqu(Operand dst, YMMRegister src);
void vmovdqu(YMMRegister dst, YMMRegister src);
void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
......
......@@ -2532,7 +2532,10 @@ TEST(AssemblerX64Regmove256bit) {
CpuFeatureScope fscope(&masm, AVX);
__ vmovdqa(ymm0, ymm1);
__ vmovdqa(ymm4, Operand(rbx, rcx, times_4, 10000));
__ vmovdqu(ymm10, ymm11);
__ vmovdqu(ymm9, Operand(rbx, rcx, times_4, 10000));
__ vmovdqu(Operand(rbx, rcx, times_4, 10000), ymm0);
CodeDesc desc;
masm.GetCode(isolate, &desc);
......@@ -2544,9 +2547,18 @@ TEST(AssemblerX64Regmove256bit) {
#endif
byte expected[] = {// VMOVDQA
// vmovdqa ymm0,ymm1
0xC5, 0xFD, 0x6F, 0xC1,
// vmovdqa ymm4,YMMWORD PTR [rbx+rcx*4+0x2710]
0xC5, 0xFD, 0x6F, 0xA4, 0x8B, 0x10, 0x27, 0x00, 0x00,
// VMOVDQU
0xC4, 0x41, 0x7E, 0x7F, 0xDA};
// vmovdqu ymm10,ymm11
0xC4, 0x41, 0x7E, 0x7F, 0xDA,
// vmovdqu ymm9,YMMWORD PTR [rbx+rcx*4+0x2710]
0xC5, 0x7E, 0x6F, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vmovdqu YMMWORD PTR [rbx+rcx*4+0x2710],ymm0
0xC5, 0xFE, 0x7F, 0x84, 0x8B, 0x10, 0x27, 0x00, 0x00};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment