Commit 65bcf6c2 authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32][wasm] Add unpacking integer conversions

I32x4U/SConvertI16x8Low/High, I16x8U/SConvertI8x16Low/High

Add pmovsxbw/wd, pmovzxbw/wd and AVX version
Add Palignr, Pmovsxbw/wd, Pmovzxbw/wd macro.
Reconstruct ptest/vptest, Ptest using macro

Change-Id: I4a26b3bb6d5791f72e3192c914758287701e78a0
Reviewed-on: https://chromium-review.googlesource.com/1111691
Commit-Queue: Jing Bao <jing.bao@intel.com>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarAseem Garg <aseemgarg@chromium.org>
Cr-Commit-Position: refs/heads/master@{#53990}
parent 24d8254e
......@@ -2010,6 +2010,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1));
break;
}
case kIA32I32x4SConvertI16x8Low: {
__ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kIA32I32x4SConvertI16x8High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputOperand(0), 8);
__ Pmovsxwd(dst, dst);
break;
}
case kIA32I32x4Neg: {
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(0);
......@@ -2169,6 +2179,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break;
}
case kIA32I32x4UConvertI16x8Low: {
__ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kIA32I32x4UConvertI16x8High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputOperand(0), 8);
__ Pmovzxwd(dst, dst);
break;
}
case kSSEI32x4ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psrld(i.OutputSimd128Register(), i.InputInt8(1));
......@@ -2267,6 +2287,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1));
break;
}
case kIA32I16x8SConvertI8x16Low: {
__ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kIA32I16x8SConvertI8x16High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputOperand(0), 8);
__ Pmovsxbw(dst, dst);
break;
}
case kIA32I16x8Neg: {
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(0);
......@@ -2455,6 +2485,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break;
}
case kIA32I16x8UConvertI8x16Low: {
__ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kIA32I16x8UConvertI8x16High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputOperand(0), 8);
__ Pmovzxbw(dst, dst);
break;
}
case kSSEI16x8ShrU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
......
......@@ -154,6 +154,8 @@ namespace compiler {
V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \
V(AVXI32x4ReplaceLane) \
V(IA32I32x4SConvertI16x8Low) \
V(IA32I32x4SConvertI16x8High) \
V(IA32I32x4Neg) \
V(SSEI32x4Shl) \
V(AVXI32x4Shl) \
......@@ -179,6 +181,8 @@ namespace compiler {
V(AVXI32x4GtS) \
V(SSEI32x4GeS) \
V(AVXI32x4GeS) \
V(IA32I32x4UConvertI16x8Low) \
V(IA32I32x4UConvertI16x8High) \
V(SSEI32x4ShrU) \
V(AVXI32x4ShrU) \
V(SSEI32x4MinU) \
......@@ -193,6 +197,8 @@ namespace compiler {
V(IA32I16x8ExtractLane) \
V(SSEI16x8ReplaceLane) \
V(AVXI16x8ReplaceLane) \
V(IA32I16x8SConvertI8x16Low) \
V(IA32I16x8SConvertI8x16High) \
V(IA32I16x8Neg) \
V(SSEI16x8Shl) \
V(AVXI16x8Shl) \
......@@ -224,6 +230,8 @@ namespace compiler {
V(AVXI16x8GtS) \
V(SSEI16x8GeS) \
V(AVXI16x8GeS) \
V(IA32I16x8UConvertI8x16Low) \
V(IA32I16x8UConvertI8x16High) \
V(SSEI16x8ShrU) \
V(AVXI16x8ShrU) \
V(SSEI16x8UConvertI32x4) \
......
......@@ -136,6 +136,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane:
case kAVXI32x4ReplaceLane:
case kIA32I32x4SConvertI16x8Low:
case kIA32I32x4SConvertI16x8High:
case kIA32I32x4Neg:
case kSSEI32x4Shl:
case kAVXI32x4Shl:
......@@ -161,6 +163,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4GtS:
case kSSEI32x4GeS:
case kAVXI32x4GeS:
case kIA32I32x4UConvertI16x8Low:
case kIA32I32x4UConvertI16x8High:
case kSSEI32x4ShrU:
case kAVXI32x4ShrU:
case kSSEI32x4MinU:
......@@ -175,6 +179,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I16x8ExtractLane:
case kSSEI16x8ReplaceLane:
case kAVXI16x8ReplaceLane:
case kIA32I16x8SConvertI8x16Low:
case kIA32I16x8SConvertI8x16High:
case kIA32I16x8Neg:
case kSSEI16x8Shl:
case kAVXI16x8Shl:
......@@ -206,6 +212,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI16x8GtS:
case kSSEI16x8GeS:
case kAVXI16x8GeS:
case kIA32I16x8UConvertI8x16Low:
case kIA32I16x8UConvertI8x16High:
case kSSEI16x8ShrU:
case kAVXI16x8ShrU:
case kSSEI16x8UConvertI32x4:
......
......@@ -1794,12 +1794,20 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Or) \
V(S128Xor)
#define SIMD_UNOP_LIST(V) \
V(F32x4SConvertI32x4) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(I32x4Neg) \
V(I16x8Neg) \
#define SIMD_UNOP_LIST(V) \
V(F32x4SConvertI32x4) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High) \
V(I32x4Neg) \
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8Neg) \
V(I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High) \
V(I8x16Neg)
#define SIMD_UNOP_PREFIX_LIST(V) \
......
......@@ -2404,7 +2404,7 @@ void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
UNIMPLEMENTED();
}
......@@ -2437,11 +2437,6 @@ void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI16x8SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
......
......@@ -2598,16 +2598,6 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
EMIT(imm8);
}
void Assembler::ptest(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x38);
EMIT(0x17);
emit_sse_operand(dst, src);
}
void Assembler::psllw(XMMRegister reg, int8_t shift) {
EnsureSpace ensure_space(this);
EMIT(0x66);
......
......@@ -1109,9 +1109,6 @@ class Assembler : public AssemblerBase {
void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
void extractps(Register dst, XMMRegister src, byte imm8);
void ptest(XMMRegister dst, XMMRegister src) { ptest(dst, Operand(src)); }
void ptest(XMMRegister dst, Operand src);
void psllw(XMMRegister reg, int8_t shift);
void pslld(XMMRegister reg, int8_t shift);
void psrlw(XMMRegister reg, int8_t shift);
......@@ -1438,10 +1435,6 @@ class Assembler : public AssemblerBase {
}
void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
void vptest(XMMRegister dst, XMMRegister src) { vptest(dst, Operand(src)); }
void vptest(XMMRegister dst, Operand src) {
vinstr(0x17, dst, xmm0, src, k66, k0F38, kWIG);
}
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8);
......@@ -1701,6 +1694,7 @@ class Assembler : public AssemblerBase {
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
#undef DECLARE_SSE4_INSTRUCTION
#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
......@@ -1716,6 +1710,18 @@ class Assembler : public AssemblerBase {
SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
#undef DECLARE_SSE34_AVX_INSTRUCTION
#define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void v##instruction(XMMRegister dst, XMMRegister src) { \
v##instruction(dst, Operand(src)); \
} \
void v##instruction(XMMRegister dst, Operand src) { \
vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
}
SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
#undef DECLARE_SSE4_AVX_RM_INSTRUCTION
// Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
// non-temporal
......
......@@ -735,10 +735,6 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x17:
AppendToBuffer("vptest %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
......@@ -816,6 +812,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
SSSE3_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
SSE4_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
#define DECLARE_SSE_AVX_RM_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, \
opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,", NameOfXMMRegister(regop)); \
current += PrintRightXMMOperand(current); \
break; \
}
SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE_AVX_RM_DIS_CASE)
#undef DECLARE_SSE_AVX_RM_DIS_CASE
default:
UnimplementedInstruction();
}
......@@ -1947,10 +1953,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
switch (op) {
case 0x17:
AppendToBuffer("ptest %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
break;
#define SSE34_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
case 0x##opcode: { \
AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
......@@ -1960,6 +1962,7 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
SSSE3_INSTRUCTION_LIST(SSE34_DIS_CASE)
SSE4_INSTRUCTION_LIST(SSE34_DIS_CASE)
SSE4_RM_INSTRUCTION_LIST(SSE34_DIS_CASE)
#undef SSE34_DIS_CASE
default:
UnimplementedInstruction();
......
......@@ -1330,20 +1330,6 @@ void TurboAssembler::Psignd(XMMRegister dst, Operand src) {
UNREACHABLE();
}
void TurboAssembler::Ptest(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vptest(dst, src);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
ptest(dst, src);
return;
}
UNREACHABLE();
}
void TurboAssembler::Pshufb(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......@@ -1372,6 +1358,20 @@ void TurboAssembler::Pblendw(XMMRegister dst, Operand src, uint8_t imm8) {
UNREACHABLE();
}
void TurboAssembler::Palignr(XMMRegister dst, Operand src, uint8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpalignr(dst, dst, src, imm8);
return;
}
if (CpuFeatures::IsSupported(SSSE3)) {
CpuFeatureScope sse_scope(this, SSSE3);
palignr(dst, src, imm8);
return;
}
UNREACHABLE();
}
void TurboAssembler::Pextrb(Register dst, XMMRegister src, int8_t imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......
......@@ -284,9 +284,34 @@ class TurboAssembler : public TurboAssemblerBase {
#undef AVX_OP3_XO
#undef AVX_OP3_WITH_TYPE
// Non-SSE2 instructions.
void Ptest(XMMRegister dst, XMMRegister src) { Ptest(dst, Operand(src)); }
void Ptest(XMMRegister dst, Operand src);
// Non-SSE2 instructions.
#define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
sse_scope) \
void macro_name(dst_type dst, src_type src) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, src); \
return; \
} \
if (CpuFeatures::IsSupported(sse_scope)) { \
CpuFeatureScope scope(this, sse_scope); \
name(dst, src); \
return; \
} \
UNREACHABLE(); \
}
#define AVX_OP2_XO_SSE4(macro_name, name) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSE4_1) \
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1)
AVX_OP2_XO_SSE4(Ptest, ptest)
AVX_OP2_XO_SSE4(Pmovsxbw, pmovsxbw)
AVX_OP2_XO_SSE4(Pmovsxwd, pmovsxwd)
AVX_OP2_XO_SSE4(Pmovzxbw, pmovzxbw)
AVX_OP2_XO_SSE4(Pmovzxwd, pmovzxwd)
#undef AVX_OP2_WITH_TYPE_SCOPE
#undef AVX_OP2_XO_SSE4
void Pshufb(XMMRegister dst, XMMRegister src) { Pshufb(dst, Operand(src)); }
void Pshufb(XMMRegister dst, Operand src);
......@@ -302,6 +327,11 @@ class TurboAssembler : public TurboAssemblerBase {
void Psignd(XMMRegister dst, XMMRegister src) { Psignd(dst, Operand(src)); }
void Psignd(XMMRegister dst, Operand src);
void Palignr(XMMRegister dst, XMMRegister src, uint8_t imm8) {
Palignr(dst, Operand(src), imm8);
}
void Palignr(XMMRegister dst, Operand src, uint8_t imm8);
void Pextrb(Register dst, XMMRegister src, int8_t imm8);
void Pextrw(Register dst, XMMRegister src, int8_t imm8);
void Pextrd(Register dst, XMMRegister src, int8_t imm8);
......
......@@ -72,4 +72,11 @@
V(pmaxud, 66, 0F, 38, 3F) \
V(pmulld, 66, 0F, 38, 40)
#define SSE4_RM_INSTRUCTION_LIST(V) \
V(pmovsxbw, 66, 0F, 38, 20) \
V(pmovsxwd, 66, 0F, 38, 23) \
V(pmovzxbw, 66, 0F, 38, 30) \
V(pmovzxwd, 66, 0F, 38, 33) \
V(ptest, 66, 0F, 38, 17)
#endif // V8_IA32_SSE_INSTR_H_
......@@ -572,10 +572,9 @@ TEST(DisasmIa320) {
__ pinsrd(xmm1, eax, 0);
__ pinsrd(xmm1, Operand(edx, 4), 0);
__ extractps(eax, xmm1, 0);
__ ptest(xmm5, xmm1);
__ ptest(xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
SSE4_RM_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
}
}
#undef EMIT_SSE34_INSTR
......@@ -666,9 +665,6 @@ TEST(DisasmIa320) {
__ vmaxpd(xmm0, xmm1, xmm2);
__ vmaxpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vptest(xmm5, xmm1);
__ vptest(xmm5, Operand(edx, 4));
__ vpsllw(xmm0, xmm7, 21);
__ vpslld(xmm0, xmm7, 21);
__ vpsrlw(xmm0, xmm7, 21);
......@@ -727,6 +723,14 @@ TEST(DisasmIa320) {
SSSE3_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
SSE4_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
#undef EMIT_SSE34_AVXINSTR
#define EMIT_SSE4_RM_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \
notUsed4) \
__ v##instruction(xmm5, xmm1); \
__ v##instruction(xmm5, Operand(edx, 4));
SSE4_RM_INSTRUCTION_LIST(EMIT_SSE4_RM_AVXINSTR)
#undef EMIT_SSE4_RM_AVXINSTR
}
}
......
......@@ -884,7 +884,11 @@ WASM_SIMD_TEST(I32x4ConvertF32x4) {
CHECK_EQ(1, r.Call(*i, signed_value, unsigned_value));
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion from I16x8 (unpacking).
WASM_SIMD_TEST(I32x4ConvertI16x8) {
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
......@@ -926,7 +930,7 @@ WASM_SIMD_TEST(I32x4ConvertI16x8) {
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunI32x4UnOpTest(WasmExecutionMode execution_mode, LowerSimd lower_simd,
WasmOpcode simd_op, Int32UnOp expected_op) {
......@@ -1106,7 +1110,7 @@ WASM_SIMD_TEST(I32x4ShrU) {
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion from I8x16 (unpacking).
WASM_SIMD_TEST(I16x8ConvertI8x16) {
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
......@@ -1150,7 +1154,7 @@ WASM_SIMD_TEST(I16x8ConvertI8x16) {
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunI16x8UnOpTest(WasmExecutionMode execution_mode, LowerSimd lower_simd,
WasmOpcode simd_op, Int16UnOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment