Commit 6dedc6e9 authored by jing.bao's avatar jing.bao Committed by V8 LUCI CQ

[x64] Implement 256-bit assembly for vpshufd/hw/lw

Bug: v8:12228
Change-Id: If771a7526f2567d68e676f069223ac16b7303884
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3167036Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/main@{#77000}
parent 0a5a7af6
......@@ -3688,6 +3688,17 @@ void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
emit_sse_operand(dst, src2);
}
void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1,
YMMRegister src2, SIMDPrefix pp, LeadingOpcode m, VexW w,
CpuFeature feature) {
DCHECK(IsEnabled(feature));
DCHECK(feature == AVX || feature == AVX2);
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL256, pp, m, w);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w,
CpuFeature feature) {
......@@ -3699,6 +3710,17 @@ void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
emit_sse_operand(dst, src2);
}
void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w,
CpuFeature feature) {
DCHECK(IsEnabled(feature));
DCHECK(feature == AVX || feature == AVX2);
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL256, pp, m, w);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(AVX));
......
......@@ -964,8 +964,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature feature = AVX);
void vinstr(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2,
SIMDPrefix pp, LeadingOpcode m, VexW w,
CpuFeature feature = AVX2);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature feature = AVX);
void vinstr(byte op, YMMRegister dst, YMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w,
CpuFeature feature = AVX2);
// SSE instructions
void sse_instr(XMMRegister dst, XMMRegister src, byte escape, byte opcode);
......@@ -1662,24 +1668,48 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
emit(imm8);
}
void vpshufd(YMMRegister dst, YMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, ymm0, src, k66, k0F, kWIG);
emit(imm8);
}
void vpshufd(XMMRegister dst, Operand src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
emit(imm8);
}
void vpshufd(YMMRegister dst, Operand src, uint8_t imm8) {
vinstr(0x70, dst, ymm0, src, k66, k0F, kWIG);
emit(imm8);
}
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
emit(imm8);
}
void vpshuflw(YMMRegister dst, YMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, ymm0, src, kF2, k0F, kWIG);
emit(imm8);
}
void vpshuflw(XMMRegister dst, Operand src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
emit(imm8);
}
void vpshuflw(YMMRegister dst, Operand src, uint8_t imm8) {
vinstr(0x70, dst, ymm0, src, kF2, k0F, kWIG);
emit(imm8);
}
void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
emit(imm8);
}
void vpshufhw(YMMRegister dst, YMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, ymm0, src, kF3, k0F, kWIG);
emit(imm8);
}
void vpshufhw(XMMRegister dst, Operand src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
emit(imm8);
}
void vpshufhw(YMMRegister dst, Operand src, uint8_t imm8) {
vinstr(0x70, dst, ymm0, src, kF3, k0F, kWIG);
emit(imm8);
}
......
......@@ -2562,6 +2562,47 @@ TEST(AssemblerX64Regmove256bit) {
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
TEST(AssemblerX64Shuffle256bit) {
if (!CpuFeatures::IsSupported(AVX2)) return;
CcTest::InitializeVM();
v8::HandleScope scope(CcTest::isolate());
auto buffer = AllocateAssemblerBuffer();
Isolate* isolate = CcTest::i_isolate();
Assembler masm(AssemblerOptions{}, buffer->CreateView());
CpuFeatureScope fscope(&masm, AVX2);
__ vpshufd(ymm1, ymm2, 85);
__ vpshufd(ymm1, Operand(rbx, rcx, times_4, 10000), 85);
__ vpshuflw(ymm9, ymm10, 85);
__ vpshuflw(ymm9, Operand(rbx, rcx, times_4, 10000), 85);
__ vpshufhw(ymm1, ymm2, 85);
__ vpshufhw(ymm1, Operand(rbx, rcx, times_4, 10000), 85);
CodeDesc desc;
masm.GetCode(isolate, &desc);
#ifdef OBJECT_PRINT
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
StdoutStream os;
code->Print(os);
#endif
byte expected[] = {// vpshufd ymm1, ymm2, 85
0xC5, 0xFD, 0x70, 0xCA, 0x55,
// vpshufd ymm1,YMMWORD PTR [rbx+rcx*4+0x2710], 85
0xC5, 0xFD, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x55,
// vpshuflw ymm9, ymm10, 85,
0xC4, 0x41, 0x7F, 0x70, 0xCA, 0x55,
// vpshuflw ymm9,YMMWORD PTR [rbx+rcx*4+0x2710], 85
0xC5, 0x7F, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x55,
// vpshufhw ymm1, ymm2, 85
0xC5, 0xFE, 0x70, 0xCA, 0x55,
// vpshufhw ymm1,YMMWORD PTR [rbx+rcx*4+0x2710], 85
0xC5, 0xFE, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00,
0x55};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
TEST(AssemblerX64FloatingPoint256bit) {
if (!CpuFeatures::IsSupported(AVX)) return;
CcTest::InitializeVM();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment