Commit 584a3514 authored by jing.bao's avatar jing.bao Committed by Commit bot

[x64] Introduce BMI instructions.

BUG=v8:4015
LOG=n

Review URL: https://codereview.chromium.org/1040603002

Cr-Commit-Position: refs/heads/master@{#27648}
parent a326fd86
......@@ -52,22 +52,23 @@ namespace base {
#if !V8_LIBC_MSVCRT
static V8_INLINE void __cpuid(int cpu_info[4], int info_type) {
// Clear ecx to align with __cpuid() of MSVC:
// https://msdn.microsoft.com/en-us/library/hskdteyh.aspx
#if defined(__i386__) && defined(__pic__)
// Make sure to preserve ebx, which contains the pointer
// to the GOT in case we're generating PIC.
__asm__ volatile (
"mov %%ebx, %%edi\n\t"
"cpuid\n\t"
"xchg %%edi, %%ebx\n\t"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
__asm__ volatile(
"mov %%ebx, %%edi\n\t"
"cpuid\n\t"
"xchg %%edi, %%ebx\n\t"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type), "c"(0));
#else
__asm__ volatile (
"cpuid \n\t"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
__asm__ volatile("cpuid \n\t"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type), "c"(0));
#endif // defined(__i386__) && defined(__pic__)
}
......@@ -325,6 +326,10 @@ CPU::CPU()
has_osxsave_(false),
has_avx_(false),
has_fma3_(false),
has_bmi1_(false),
has_bmi2_(false),
has_lzcnt_(false),
has_popcnt_(false),
has_idiva_(false),
has_neon_(false),
has_thumb2_(false),
......@@ -371,6 +376,7 @@ CPU::CPU()
has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
has_popcnt_ = (cpu_info[2] & 0x00800000) != 0;
has_osxsave_ = (cpu_info[2] & 0x08000000) != 0;
has_avx_ = (cpu_info[2] & 0x10000000) != 0;
has_fma3_ = (cpu_info[2] & 0x00001000) != 0;
......@@ -392,10 +398,13 @@ CPU::CPU()
}
}
#if V8_HOST_ARCH_IA32
// SAHF is always available in compat/legacy mode,
has_sahf_ = true;
#else
// There are separate feature flags for VEX-encoded GPR instructions.
if (num_ids >= 7) {
__cpuid(cpu_info, 7);
has_bmi1_ = (cpu_info[1] & 0x00000008) != 0;
has_bmi2_ = (cpu_info[1] & 0x00000100) != 0;
}
// Query extended IDs.
__cpuid(cpu_info, 0x80000000);
unsigned num_ext_ids = cpu_info[0];
......@@ -403,10 +412,10 @@ CPU::CPU()
// Interpret extended CPU feature information.
if (num_ext_ids > 0x80000000) {
__cpuid(cpu_info, 0x80000001);
has_lzcnt_ = (cpu_info[2] & 0x00000020) != 0;
// SAHF must be probed in long mode.
has_sahf_ = (cpu_info[2] & 0x00000001) != 0;
}
#endif
#elif V8_HOST_ARCH_ARM
......
......@@ -86,6 +86,10 @@ class CPU FINAL {
bool has_osxsave() const { return has_osxsave_; }
bool has_avx() const { return has_avx_; }
bool has_fma3() const { return has_fma3_; }
bool has_bmi1() const { return has_bmi1_; }
bool has_bmi2() const { return has_bmi2_; }
bool has_lzcnt() const { return has_lzcnt_; }
bool has_popcnt() const { return has_popcnt_; }
bool is_atom() const { return is_atom_; }
// arm features
......@@ -125,6 +129,10 @@ class CPU FINAL {
bool has_osxsave_;
bool has_avx_;
bool has_fma3_;
bool has_bmi1_;
bool has_bmi2_;
bool has_lzcnt_;
bool has_popcnt_;
bool has_idiva_;
bool has_neon_;
bool has_thumb2_;
......
......@@ -446,6 +446,11 @@ DEFINE_BOOL(enable_sahf, true,
"enable use of SAHF instruction if available (X64 only)")
DEFINE_BOOL(enable_avx, true, "enable use of AVX instructions if available")
DEFINE_BOOL(enable_fma3, true, "enable use of FMA3 instructions if available")
DEFINE_BOOL(enable_bmi1, true, "enable use of BMI1 instructions if available")
DEFINE_BOOL(enable_bmi2, true, "enable use of BMI2 instructions if available")
DEFINE_BOOL(enable_lzcnt, true, "enable use of LZCNT instruction if available")
DEFINE_BOOL(enable_popcnt, true,
"enable use of POPCNT instruction if available")
DEFINE_BOOL(enable_vfp3, ENABLE_VFP3_DEFAULT,
"enable use of VFP3 instructions if available")
DEFINE_BOOL(enable_armv7, ENABLE_ARMV7_DEFAULT,
......
......@@ -654,6 +654,10 @@ enum CpuFeature {
SAHF,
AVX,
FMA3,
BMI1,
BMI2,
LZCNT,
POPCNT,
ATOM,
// ARM
VFP3,
......
......@@ -234,6 +234,16 @@ void Assembler::emit_vex_prefix(XMMRegister reg, XMMRegister vreg,
}
void Assembler::emit_vex_prefix(Register reg, Register vreg, Register rm,
VectorLength l, SIMDPrefix pp, LeadingOpcode mm,
VexW w) {
XMMRegister ireg = {reg.code()};
XMMRegister ivreg = {vreg.code()};
XMMRegister irm = {rm.code()};
emit_vex_prefix(ireg, ivreg, irm, l, pp, mm, w);
}
void Assembler::emit_vex_prefix(XMMRegister reg, XMMRegister vreg,
const Operand& rm, VectorLength l,
SIMDPrefix pp, LeadingOpcode mm, VexW w) {
......@@ -248,6 +258,15 @@ void Assembler::emit_vex_prefix(XMMRegister reg, XMMRegister vreg,
}
void Assembler::emit_vex_prefix(Register reg, Register vreg, const Operand& rm,
VectorLength l, SIMDPrefix pp, LeadingOpcode mm,
VexW w) {
XMMRegister ireg = {reg.code()};
XMMRegister ivreg = {vreg.code()};
emit_vex_prefix(ireg, ivreg, rm, l, pp, mm, w);
}
Address Assembler::target_address_at(Address pc,
ConstantPoolArray* constant_pool) {
return Memory::int32_at(pc) + pc + 4;
......
......@@ -90,6 +90,10 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
OSHasAVXSupport()) {
supported_ |= 1u << FMA3;
}
if (cpu.has_bmi1() && FLAG_enable_bmi1) supported_ |= 1u << BMI1;
if (cpu.has_bmi2() && FLAG_enable_bmi2) supported_ |= 1u << BMI2;
if (cpu.has_lzcnt() && FLAG_enable_lzcnt) supported_ |= 1u << LZCNT;
if (cpu.has_popcnt() && FLAG_enable_popcnt) supported_ |= 1u << POPCNT;
if (strcmp(FLAG_mcpu, "auto") == 0) {
if (cpu.is_atom()) supported_ |= 1u << ATOM;
} else if (strcmp(FLAG_mcpu, "atom") == 0) {
......@@ -100,10 +104,14 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
void CpuFeatures::PrintTarget() { }
void CpuFeatures::PrintFeatures() {
printf("SSE3=%d SSE4_1=%d SAHF=%d AVX=%d FMA3=%d ATOM=%d\n",
CpuFeatures::IsSupported(SSE3), CpuFeatures::IsSupported(SSE4_1),
CpuFeatures::IsSupported(SAHF), CpuFeatures::IsSupported(AVX),
CpuFeatures::IsSupported(FMA3), CpuFeatures::IsSupported(ATOM));
printf(
"SSE3=%d SSE4_1=%d SAHF=%d AVX=%d FMA3=%d BMI1=%d BMI2=%d LZCNT=%d "
"POPCNT=%d ATOM=%d\n",
CpuFeatures::IsSupported(SSE3), CpuFeatures::IsSupported(SSE4_1),
CpuFeatures::IsSupported(SAHF), CpuFeatures::IsSupported(AVX),
CpuFeatures::IsSupported(FMA3), CpuFeatures::IsSupported(BMI1),
CpuFeatures::IsSupported(BMI2), CpuFeatures::IsSupported(LZCNT),
CpuFeatures::IsSupported(POPCNT), CpuFeatures::IsSupported(ATOM));
}
......@@ -3540,6 +3548,262 @@ void Assembler::vss(byte op, XMMRegister dst, XMMRegister src1,
}
void Assembler::bmi1q(byte op, Register reg, Register vreg, Register rm) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, kNone, k0F38, kW1);
emit(op);
emit_modrm(reg, rm);
}
void Assembler::bmi1q(byte op, Register reg, Register vreg, const Operand& rm) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, kNone, k0F38, kW1);
emit(op);
emit_operand(reg, rm);
}
void Assembler::bmi1l(byte op, Register reg, Register vreg, Register rm) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, kNone, k0F38, kW0);
emit(op);
emit_modrm(reg, rm);
}
void Assembler::bmi1l(byte op, Register reg, Register vreg, const Operand& rm) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, kNone, k0F38, kW0);
emit(op);
emit_operand(reg, rm);
}
void Assembler::tzcntq(Register dst, Register src) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBC);
emit_modrm(dst, src);
}
void Assembler::tzcntq(Register dst, const Operand& src) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBC);
emit_operand(dst, src);
}
void Assembler::tzcntl(Register dst, Register src) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xBC);
emit_modrm(dst, src);
}
void Assembler::tzcntl(Register dst, const Operand& src) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xBC);
emit_operand(dst, src);
}
void Assembler::lzcntq(Register dst, Register src) {
DCHECK(IsEnabled(LZCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBD);
emit_modrm(dst, src);
}
void Assembler::lzcntq(Register dst, const Operand& src) {
DCHECK(IsEnabled(LZCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBD);
emit_operand(dst, src);
}
void Assembler::lzcntl(Register dst, Register src) {
DCHECK(IsEnabled(LZCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xBD);
emit_modrm(dst, src);
}
void Assembler::lzcntl(Register dst, const Operand& src) {
DCHECK(IsEnabled(LZCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xBD);
emit_operand(dst, src);
}
void Assembler::popcntq(Register dst, Register src) {
DCHECK(IsEnabled(POPCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xB8);
emit_modrm(dst, src);
}
void Assembler::popcntq(Register dst, const Operand& src) {
DCHECK(IsEnabled(POPCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xB8);
emit_operand(dst, src);
}
void Assembler::popcntl(Register dst, Register src) {
DCHECK(IsEnabled(POPCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xB8);
emit_modrm(dst, src);
}
void Assembler::popcntl(Register dst, const Operand& src) {
DCHECK(IsEnabled(POPCNT));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xB8);
emit_operand(dst, src);
}
void Assembler::bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
Register rm) {
DCHECK(IsEnabled(BMI2));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, pp, k0F38, kW1);
emit(op);
emit_modrm(reg, rm);
}
void Assembler::bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
const Operand& rm) {
DCHECK(IsEnabled(BMI2));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, pp, k0F38, kW1);
emit(op);
emit_operand(reg, rm);
}
void Assembler::bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
Register rm) {
DCHECK(IsEnabled(BMI2));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, pp, k0F38, kW0);
emit(op);
emit_modrm(reg, rm);
}
void Assembler::bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
const Operand& rm) {
DCHECK(IsEnabled(BMI2));
EnsureSpace ensure_space(this);
emit_vex_prefix(reg, vreg, rm, kLZ, pp, k0F38, kW0);
emit(op);
emit_operand(reg, rm);
}
void Assembler::rorxq(Register dst, Register src, byte imm8) {
DCHECK(IsEnabled(BMI2));
DCHECK(is_uint8(imm8));
Register vreg = {0}; // VEX.vvvv unused
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, vreg, src, kLZ, kF2, k0F3A, kW1);
emit(0xF0);
emit_modrm(dst, src);
emit(imm8);
}
void Assembler::rorxq(Register dst, const Operand& src, byte imm8) {
DCHECK(IsEnabled(BMI2));
DCHECK(is_uint8(imm8));
Register vreg = {0}; // VEX.vvvv unused
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, vreg, src, kLZ, kF2, k0F3A, kW1);
emit(0xF0);
emit_operand(dst, src);
emit(imm8);
}
void Assembler::rorxl(Register dst, Register src, byte imm8) {
DCHECK(IsEnabled(BMI2));
DCHECK(is_uint8(imm8));
Register vreg = {0}; // VEX.vvvv unused
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, vreg, src, kLZ, kF2, k0F3A, kW0);
emit(0xF0);
emit_modrm(dst, src);
emit(imm8);
}
void Assembler::rorxl(Register dst, const Operand& src, byte imm8) {
DCHECK(IsEnabled(BMI2));
DCHECK(is_uint8(imm8));
Register vreg = {0}; // VEX.vvvv unused
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, vreg, src, kLZ, kF2, k0F3A, kW0);
emit(0xF0);
emit_operand(dst, src);
emit(imm8);
}
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
Register ireg = { reg.code() };
emit_operand(ireg, adr);
......
......@@ -1405,6 +1405,183 @@ class Assembler : public AssemblerBase {
void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
// BMI instruction
void andnq(Register dst, Register src1, Register src2) {
bmi1q(0xf2, dst, src1, src2);
}
void andnq(Register dst, Register src1, const Operand& src2) {
bmi1q(0xf2, dst, src1, src2);
}
void andnl(Register dst, Register src1, Register src2) {
bmi1l(0xf2, dst, src1, src2);
}
void andnl(Register dst, Register src1, const Operand& src2) {
bmi1l(0xf2, dst, src1, src2);
}
void bextrq(Register dst, Register src1, Register src2) {
bmi1q(0xf7, dst, src2, src1);
}
void bextrq(Register dst, const Operand& src1, Register src2) {
bmi1q(0xf7, dst, src2, src1);
}
void bextrl(Register dst, Register src1, Register src2) {
bmi1l(0xf7, dst, src2, src1);
}
void bextrl(Register dst, const Operand& src1, Register src2) {
bmi1l(0xf7, dst, src2, src1);
}
void blsiq(Register dst, Register src) {
Register ireg = {3};
bmi1q(0xf3, ireg, dst, src);
}
void blsiq(Register dst, const Operand& src) {
Register ireg = {3};
bmi1q(0xf3, ireg, dst, src);
}
void blsil(Register dst, Register src) {
Register ireg = {3};
bmi1l(0xf3, ireg, dst, src);
}
void blsil(Register dst, const Operand& src) {
Register ireg = {3};
bmi1l(0xf3, ireg, dst, src);
}
void blsmskq(Register dst, Register src) {
Register ireg = {2};
bmi1q(0xf3, ireg, dst, src);
}
void blsmskq(Register dst, const Operand& src) {
Register ireg = {2};
bmi1q(0xf3, ireg, dst, src);
}
void blsmskl(Register dst, Register src) {
Register ireg = {2};
bmi1l(0xf3, ireg, dst, src);
}
void blsmskl(Register dst, const Operand& src) {
Register ireg = {2};
bmi1l(0xf3, ireg, dst, src);
}
void blsrq(Register dst, Register src) {
Register ireg = {1};
bmi1q(0xf3, ireg, dst, src);
}
void blsrq(Register dst, const Operand& src) {
Register ireg = {1};
bmi1q(0xf3, ireg, dst, src);
}
void blsrl(Register dst, Register src) {
Register ireg = {1};
bmi1l(0xf3, ireg, dst, src);
}
void blsrl(Register dst, const Operand& src) {
Register ireg = {1};
bmi1l(0xf3, ireg, dst, src);
}
void tzcntq(Register dst, Register src);
void tzcntq(Register dst, const Operand& src);
void tzcntl(Register dst, Register src);
void tzcntl(Register dst, const Operand& src);
void lzcntq(Register dst, Register src);
void lzcntq(Register dst, const Operand& src);
void lzcntl(Register dst, Register src);
void lzcntl(Register dst, const Operand& src);
void popcntq(Register dst, Register src);
void popcntq(Register dst, const Operand& src);
void popcntl(Register dst, Register src);
void popcntl(Register dst, const Operand& src);
void bzhiq(Register dst, Register src1, Register src2) {
bmi2q(kNone, 0xf5, dst, src2, src1);
}
void bzhiq(Register dst, const Operand& src1, Register src2) {
bmi2q(kNone, 0xf5, dst, src2, src1);
}
void bzhil(Register dst, Register src1, Register src2) {
bmi2l(kNone, 0xf5, dst, src2, src1);
}
void bzhil(Register dst, const Operand& src1, Register src2) {
bmi2l(kNone, 0xf5, dst, src2, src1);
}
void mulxq(Register dst1, Register dst2, Register src) {
bmi2q(kF2, 0xf6, dst1, dst2, src);
}
void mulxq(Register dst1, Register dst2, const Operand& src) {
bmi2q(kF2, 0xf6, dst1, dst2, src);
}
void mulxl(Register dst1, Register dst2, Register src) {
bmi2l(kF2, 0xf6, dst1, dst2, src);
}
void mulxl(Register dst1, Register dst2, const Operand& src) {
bmi2l(kF2, 0xf6, dst1, dst2, src);
}
void pdepq(Register dst, Register src1, Register src2) {
bmi2q(kF2, 0xf5, dst, src1, src2);
}
void pdepq(Register dst, Register src1, const Operand& src2) {
bmi2q(kF2, 0xf5, dst, src1, src2);
}
void pdepl(Register dst, Register src1, Register src2) {
bmi2l(kF2, 0xf5, dst, src1, src2);
}
void pdepl(Register dst, Register src1, const Operand& src2) {
bmi2l(kF2, 0xf5, dst, src1, src2);
}
void pextq(Register dst, Register src1, Register src2) {
bmi2q(kF3, 0xf5, dst, src1, src2);
}
void pextq(Register dst, Register src1, const Operand& src2) {
bmi2q(kF3, 0xf5, dst, src1, src2);
}
void pextl(Register dst, Register src1, Register src2) {
bmi2l(kF3, 0xf5, dst, src1, src2);
}
void pextl(Register dst, Register src1, const Operand& src2) {
bmi2l(kF3, 0xf5, dst, src1, src2);
}
void sarxq(Register dst, Register src1, Register src2) {
bmi2q(kF3, 0xf7, dst, src2, src1);
}
void sarxq(Register dst, const Operand& src1, Register src2) {
bmi2q(kF3, 0xf7, dst, src2, src1);
}
void sarxl(Register dst, Register src1, Register src2) {
bmi2l(kF3, 0xf7, dst, src2, src1);
}
void sarxl(Register dst, const Operand& src1, Register src2) {
bmi2l(kF3, 0xf7, dst, src2, src1);
}
void shlxq(Register dst, Register src1, Register src2) {
bmi2q(k66, 0xf7, dst, src2, src1);
}
void shlxq(Register dst, const Operand& src1, Register src2) {
bmi2q(k66, 0xf7, dst, src2, src1);
}
void shlxl(Register dst, Register src1, Register src2) {
bmi2l(k66, 0xf7, dst, src2, src1);
}
void shlxl(Register dst, const Operand& src1, Register src2) {
bmi2l(k66, 0xf7, dst, src2, src1);
}
void shrxq(Register dst, Register src1, Register src2) {
bmi2q(kF2, 0xf7, dst, src2, src1);
}
void shrxq(Register dst, const Operand& src1, Register src2) {
bmi2q(kF2, 0xf7, dst, src2, src1);
}
void shrxl(Register dst, Register src1, Register src2) {
bmi2l(kF2, 0xf7, dst, src2, src1);
}
void shrxl(Register dst, const Operand& src1, Register src2) {
bmi2l(kF2, 0xf7, dst, src2, src1);
}
void rorxq(Register dst, Register src, byte imm8);
void rorxq(Register dst, const Operand& src, byte imm8);
void rorxl(Register dst, Register src, byte imm8);
void rorxl(Register dst, const Operand& src, byte imm8);
// Debugging
void Print();
......@@ -1602,9 +1779,9 @@ class Assembler : public AssemblerBase {
// Emit vex prefix
enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128 };
enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x2 };
enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
void emit_vex2_byte0() { emit(0xc5); }
inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
......@@ -1618,9 +1795,15 @@ class Assembler : public AssemblerBase {
inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
VectorLength l, SIMDPrefix pp, LeadingOpcode m,
VexW w);
inline void emit_vex_prefix(Register reg, Register v, Register rm,
VectorLength l, SIMDPrefix pp, LeadingOpcode m,
VexW w);
inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm,
VectorLength l, SIMDPrefix pp, LeadingOpcode m,
VexW w);
inline void emit_vex_prefix(Register reg, Register v, const Operand& rm,
VectorLength l, SIMDPrefix pp, LeadingOpcode m,
VexW w);
// Emit the ModR/M byte, and optionally the SIB byte and
// 1- or 4-byte offset for a memory operand. Also encodes
......@@ -1895,6 +2078,18 @@ class Assembler : public AssemblerBase {
arithmetic_op(0x31, src, dst, size);
}
// Most BMI instructions are similiar.
void bmi1q(byte op, Register reg, Register vreg, Register rm);
void bmi1q(byte op, Register reg, Register vreg, const Operand& rm);
void bmi1l(byte op, Register reg, Register vreg, Register rm);
void bmi1l(byte op, Register reg, Register vreg, const Operand& rm);
void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
const Operand& rm);
void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
const Operand& rm);
friend class CodePatcher;
friend class EnsureSpace;
friend class RegExpMacroAssemblerX64;
......
......@@ -359,6 +359,12 @@ class DisassemblerX64 {
return (checked & 4) != 1;
}
bool vex_none() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
return (checked & 3) == 0;
}
bool vex_66() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
......@@ -940,6 +946,12 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xf7:
AppendToBuffer("shlx%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfCPURegister(vvvv));
break;
default:
UnimplementedInstruction();
}
......@@ -1017,6 +1029,114 @@ int DisassemblerX64::AVXInstruction(byte* data) {
default:
UnimplementedInstruction();
}
} else if (vex_none() && vex_0f38()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
const char* mnem = "?";
switch (opcode) {
case 0xf2:
AppendToBuffer("andn%c %s,%s,", operand_size_code(),
NameOfCPURegister(regop), NameOfCPURegister(vvvv));
current += PrintRightOperand(current);
break;
case 0xf5:
AppendToBuffer("bzhi%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfCPURegister(vvvv));
break;
case 0xf7:
AppendToBuffer("bextr%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfCPURegister(vvvv));
break;
case 0xf3:
switch (regop) {
case 1:
mnem = "blsr";
break;
case 2:
mnem = "blsmsk";
break;
case 3:
mnem = "blsi";
break;
default:
UnimplementedInstruction();
}
AppendToBuffer("%s%c %s,", mnem, operand_size_code(),
NameOfCPURegister(vvvv));
current += PrintRightOperand(current);
mnem = "?";
break;
default:
UnimplementedInstruction();
}
} else if (vex_f2() && vex_0f38()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0xf5:
AppendToBuffer("pdep%c %s,%s,", operand_size_code(),
NameOfCPURegister(regop), NameOfCPURegister(vvvv));
current += PrintRightOperand(current);
break;
case 0xf6:
AppendToBuffer("mulx%c %s,%s,", operand_size_code(),
NameOfCPURegister(regop), NameOfCPURegister(vvvv));
current += PrintRightOperand(current);
break;
case 0xf7:
AppendToBuffer("shrx%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfCPURegister(vvvv));
break;
default:
UnimplementedInstruction();
}
} else if (vex_f3() && vex_0f38()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0xf5:
AppendToBuffer("pext%c %s,%s,", operand_size_code(),
NameOfCPURegister(regop), NameOfCPURegister(vvvv));
current += PrintRightOperand(current);
break;
case 0xf7:
AppendToBuffer("sarx%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfCPURegister(vvvv));
break;
default:
UnimplementedInstruction();
}
} else if (vex_f2() && vex_0f3a()) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0xf0:
AppendToBuffer("rorx%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
switch (operand_size()) {
case OPERAND_DOUBLEWORD_SIZE:
AppendToBuffer(",%d", *current & 0x1f);
break;
case OPERAND_QUADWORD_SIZE:
AppendToBuffer(",%d", *current & 0x3f);
break;
default:
UnimplementedInstruction();
}
current += 1;
break;
default:
UnimplementedInstruction();
}
} else {
UnimplementedInstruction();
}
......@@ -1431,6 +1551,24 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("%s %s,", mnemonic, NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0xB8) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("popcnt%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
} else if (opcode == 0xBC) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("tzcnt%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
} else if (opcode == 0xBD) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("lzcnt%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightOperand(current);
} else if (opcode == 0xC2) {
// Intel manual 2A, Table 3-18.
int mod, regop, rm;
......@@ -1643,12 +1781,14 @@ int DisassemblerX64::InstructionDecode(v8::internal::Vector<char> out_buffer,
vex_byte1_ = *(data + 1);
vex_byte2_ = *(data + 2);
setRex(0x40 | (~(vex_byte1_ >> 5) & 7) | ((vex_byte2_ >> 4) & 8));
data += 2;
data += 3;
break; // Vex is the last prefix.
} else if (current == VEX2_PREFIX) {
vex_byte0_ = current;
vex_byte1_ = *(data + 1);
setRex(0x40 | (~(vex_byte1_ >> 5) & 4));
data++;
data += 2;
break; // Vex is the last prefix.
} else { // Not a prefix - an opcode.
break;
}
......
......@@ -2886,7 +2886,11 @@ void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
void MacroAssembler::Lzcntl(Register dst, Register src) {
// TODO(intel): Add support for LZCNT (BMI1/ABM).
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);
lzcntl(dst, src);
return;
}
Label not_zero_src;
bsrl(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
......@@ -2897,7 +2901,11 @@ void MacroAssembler::Lzcntl(Register dst, Register src) {
void MacroAssembler::Lzcntl(Register dst, const Operand& src) {
// TODO(intel): Add support for LZCNT (BMI1/ABM).
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT);
lzcntl(dst, src);
return;
}
Label not_zero_src;
bsrl(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
......
This diff is collapsed.
......@@ -596,6 +596,99 @@ TEST(DisasmX64) {
__ vfnmsub231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
}
}
// BMI1 instructions
{
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(&assm, BMI1);
__ andnq(rax, rbx, rcx);
__ andnq(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ andnl(rax, rbx, rcx);
__ andnl(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ bextrq(rax, rbx, rcx);
__ bextrq(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ bextrl(rax, rbx, rcx);
__ bextrl(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ blsiq(rax, rbx);
__ blsiq(rax, Operand(rbx, rcx, times_4, 10000));
__ blsil(rax, rbx);
__ blsil(rax, Operand(rbx, rcx, times_4, 10000));
__ blsmskq(rax, rbx);
__ blsmskq(rax, Operand(rbx, rcx, times_4, 10000));
__ blsmskl(rax, rbx);
__ blsmskl(rax, Operand(rbx, rcx, times_4, 10000));
__ blsrq(rax, rbx);
__ blsrq(rax, Operand(rbx, rcx, times_4, 10000));
__ blsrl(rax, rbx);
__ blsrl(rax, Operand(rbx, rcx, times_4, 10000));
__ tzcntq(rax, rbx);
__ tzcntq(rax, Operand(rbx, rcx, times_4, 10000));
__ tzcntl(rax, rbx);
__ tzcntl(rax, Operand(rbx, rcx, times_4, 10000));
}
}
// LZCNT instructions
{
if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(&assm, LZCNT);
__ lzcntq(rax, rbx);
__ lzcntq(rax, Operand(rbx, rcx, times_4, 10000));
__ lzcntl(rax, rbx);
__ lzcntl(rax, Operand(rbx, rcx, times_4, 10000));
}
}
// POPCNT instructions
{
if (CpuFeatures::IsSupported(POPCNT)) {
CpuFeatureScope scope(&assm, POPCNT);
__ popcntq(rax, rbx);
__ popcntq(rax, Operand(rbx, rcx, times_4, 10000));
__ popcntl(rax, rbx);
__ popcntl(rax, Operand(rbx, rcx, times_4, 10000));
}
}
// BMI2 instructions
{
if (CpuFeatures::IsSupported(BMI2)) {
CpuFeatureScope scope(&assm, BMI2);
__ bzhiq(rax, rbx, rcx);
__ bzhiq(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ bzhil(rax, rbx, rcx);
__ bzhil(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ mulxq(rax, rbx, rcx);
__ mulxq(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ mulxl(rax, rbx, rcx);
__ mulxl(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ pdepq(rax, rbx, rcx);
__ pdepq(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ pdepl(rax, rbx, rcx);
__ pdepl(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ pextq(rax, rbx, rcx);
__ pextq(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ pextl(rax, rbx, rcx);
__ pextl(rax, rbx, Operand(rbx, rcx, times_4, 10000));
__ sarxq(rax, rbx, rcx);
__ sarxq(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ sarxl(rax, rbx, rcx);
__ sarxl(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ shlxq(rax, rbx, rcx);
__ shlxq(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ shlxl(rax, rbx, rcx);
__ shlxl(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ shrxq(rax, rbx, rcx);
__ shrxq(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ shrxl(rax, rbx, rcx);
__ shrxl(rax, Operand(rbx, rcx, times_4, 10000), rbx);
__ rorxq(rax, rbx, 63);
__ rorxq(rax, Operand(rbx, rcx, times_4, 10000), 63);
__ rorxl(rax, rbx, 31);
__ rorxl(rax, Operand(rbx, rcx, times_4, 10000), 31);
}
}
// xchg.
{
__ xchgq(rax, rax);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment