Commit ab032855 authored by jiepan's avatar jiepan Committed by V8 LUCI CQ

[x64] Implement 256-bit assembler for SSSE3/SSE4/SSE42_AVX instructions

Bug: v8:12228
Change-Id: I32efb46cd71d494de4d40301224724b41ad035a9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3250410Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Jie Pan <jie.pan@intel.com>
Cr-Commit-Position: refs/heads/main@{#77602}
parent 6f66a832
......@@ -1134,6 +1134,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} \
void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
} \
void v##instruction(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0, \
AVX2); \
} \
void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0, \
AVX2); \
}
SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
......
......@@ -2703,6 +2703,24 @@ TEST(AssemblerX64Integer256bit) {
__ vpsraw(ymm7, ymm1, xmm4);
__ vpsllq(ymm3, ymm2, xmm1);
// SSSE3_AVX_INSTRUCTION
__ vpshufb(ymm1, ymm2, ymm3);
__ vphaddw(ymm8, ymm9, Operand(rbx, rcx, times_4, 10000));
__ vpmaddubsw(ymm5, ymm7, ymm9);
__ vpsignd(ymm7, ymm0, ymm1);
__ vpmulhrsw(ymm4, ymm3, ymm1);
// SSE4_AVX_INSTRUCTION
__ vpmuldq(ymm1, ymm5, ymm6);
__ vpcmpeqq(ymm0, ymm2, ymm3);
__ vpackusdw(ymm4, ymm2, ymm0);
__ vpminud(ymm8, ymm9, Operand(rbx, rcx, times_4, 10000));
__ vpmaxsb(ymm3, ymm4, ymm7);
__ vpmulld(ymm6, ymm5, ymm3);
// SSE4_2_AVX_INSTRUCTION
__ vpcmpgtq(ymm3, ymm2, ymm0);
CodeDesc desc;
masm.GetCode(isolate, &desc);
#ifdef OBJECT_PRINT
......@@ -2728,7 +2746,37 @@ TEST(AssemblerX64Integer256bit) {
// vpsraw ymm7, ymm1, xmm4
0xC5, 0xF5, 0xE1, 0xFC,
// vpsllq ymm3, ymm2, xmm1
0xC5, 0xED, 0xF3, 0xD9};
0xC5, 0xED, 0xF3, 0xD9,
// SSSE3_AVX_INSTRUCTION
// vpshufb ymm1, ymm2, ymm3
0xC4, 0xE2, 0x6D, 0x00, 0xCB,
// vphaddw ymm8, ymm9, YMMWORD PTR [rbx+rcx*4+0x2710]
0xC4, 0x62, 0x35, 0x01, 0x84, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vpmaddubsw ymm5, ymm7, ymm9
0xC4, 0xC2, 0x45, 0x04, 0xE9,
// vpsignd ymm7, ymm0, ymm1
0xC4, 0xE2, 0x7D, 0x0A, 0xF9,
// vpmulhrsw ymm4, ymm3, ymm1
0xC4, 0xE2, 0x65, 0x0B, 0xE1,
// SSE4_AVX_INSTRUCTION
// vpmuldq ymm1, ymm5, ymm6
0xC4, 0xE2, 0x55, 0x28, 0xCE,
// vpcmpeqq ymm0, ymm2, ymm3
0xC4, 0xE2, 0x6D, 0x29, 0xC3,
// vpackusdw ymm4, ymm2, ymm0
0xC4, 0xE2, 0x6D, 0x2B, 0xE0,
// vpminud ymm8, ymm9, YMMWORD PTR [rbx+rcx*4+0x2710]
0xC4, 0x62, 0x35, 0x3B, 0x84, 0x8B, 0x10, 0x27, 0x0, 0x0,
// vpmaxsb ymm3, ymm4, ymm7
0xC4, 0xE2, 0x5D, 0x3C, 0xDF,
// vpmulld ymm6, ymm5, ymm3
0xC4, 0xE2, 0x55, 0x40, 0xF3,
// SSE4_2_AVX_INSTRUCTION
// vpcmpgtq ymm3, ymm2, ymm0
0xC4, 0xE2, 0x6D, 0x37, 0xD8};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment