Commit 2ad4373b authored by Clemens Hammacher's avatar Clemens Hammacher Committed by Commit Bot

[x64] Improve loading float constants

Avoid loading to a general purpose register if the bit pattern consists
of a consecutive block of 1 bits.

Drive-by: Change a parameter from int8_t to byte such that the AVX_OP
macro works on these methods.

R=mstarzinger@chromium.org

Change-Id: Ib469ddd29d92ddeabe98460d2951b01159a6548a
Reviewed-on: https://chromium-review.googlesource.com/969123
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#52077}
parent 61e46799
...@@ -1629,85 +1629,85 @@ class Assembler : public AssemblerBase { ...@@ -1629,85 +1629,85 @@ class Assembler : public AssemblerBase {
void vlddqu(XMMRegister dst, Operand src) { void vlddqu(XMMRegister dst, Operand src) {
vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG); vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
} }
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) { void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x71, xmm6, dst, src, k66, k0F, kWIG); vinstr(0x71, xmm6, dst, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) { void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x71, xmm2, dst, src, k66, k0F, kWIG); vinstr(0x71, xmm2, dst, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) { void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x71, xmm4, dst, src, k66, k0F, kWIG); vinstr(0x71, xmm4, dst, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) { void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x72, xmm6, dst, src, k66, k0F, kWIG); vinstr(0x72, xmm6, dst, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) { void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x72, xmm2, dst, src, k66, k0F, kWIG); vinstr(0x72, xmm2, dst, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) { void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x72, xmm4, dst, src, k66, k0F, kWIG); vinstr(0x72, xmm4, dst, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
void vpextrb(Register dst, XMMRegister src, int8_t imm8) { void vpextrb(Register dst, XMMRegister src, uint8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code()); XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0); vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpextrb(Operand dst, XMMRegister src, int8_t imm8) { void vpextrb(Operand dst, XMMRegister src, uint8_t imm8) {
vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0); vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpextrw(Register dst, XMMRegister src, int8_t imm8) { void vpextrw(Register dst, XMMRegister src, uint8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code()); XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0); vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
emit(imm8); emit(imm8);
} }
void vpextrw(Operand dst, XMMRegister src, int8_t imm8) { void vpextrw(Operand dst, XMMRegister src, uint8_t imm8) {
vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0); vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpextrd(Register dst, XMMRegister src, int8_t imm8) { void vpextrd(Register dst, XMMRegister src, uint8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code()); XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0); vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpextrd(Operand dst, XMMRegister src, int8_t imm8) { void vpextrd(Operand dst, XMMRegister src, uint8_t imm8) {
vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0); vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
XMMRegister isrc = XMMRegister::from_code(src2.code()); XMMRegister isrc = XMMRegister::from_code(src2.code());
vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0); vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) { void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0); vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
XMMRegister isrc = XMMRegister::from_code(src2.code()); XMMRegister isrc = XMMRegister::from_code(src2.code());
vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0); vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
emit(imm8); emit(imm8);
} }
void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) { void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
vinstr(0xc4, dst, src1, src2, k66, k0F, kW0); vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
emit(imm8); emit(imm8);
} }
void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
XMMRegister isrc = XMMRegister::from_code(src2.code()); XMMRegister isrc = XMMRegister::from_code(src2.code());
vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0); vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) { void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0); vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
emit(imm8); emit(imm8);
} }
void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) { void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG); vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
......
...@@ -1144,15 +1144,19 @@ void TurboAssembler::MoveNumber(Register dst, double value) { ...@@ -1144,15 +1144,19 @@ void TurboAssembler::MoveNumber(Register dst, double value) {
void TurboAssembler::Move(XMMRegister dst, uint32_t src) { void TurboAssembler::Move(XMMRegister dst, uint32_t src) {
if (src == 0) { if (src == 0) {
Xorpd(dst, dst); Xorps(dst, dst);
} else { } else {
unsigned nlz = base::bits::CountLeadingZeros(src);
unsigned ntz = base::bits::CountTrailingZeros(src);
unsigned pop = base::bits::CountPopulation(src); unsigned pop = base::bits::CountPopulation(src);
DCHECK_NE(0u, pop); DCHECK_NE(0u, pop);
if (pop == 32) { if (pop + ntz + nlz == 32) {
Pcmpeqd(dst, dst); Pcmpeqd(dst, dst);
if (ntz) Pslld(dst, static_cast<byte>(ntz + nlz));
if (nlz) Psrld(dst, static_cast<byte>(nlz));
} else { } else {
movl(kScratchRegister, Immediate(src)); movl(kScratchRegister, Immediate(src));
Movq(dst, kScratchRegister); Movd(dst, kScratchRegister);
} }
} }
} }
...@@ -1165,14 +1169,10 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) { ...@@ -1165,14 +1169,10 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
unsigned ntz = base::bits::CountTrailingZeros(src); unsigned ntz = base::bits::CountTrailingZeros(src);
unsigned pop = base::bits::CountPopulation(src); unsigned pop = base::bits::CountPopulation(src);
DCHECK_NE(0u, pop); DCHECK_NE(0u, pop);
if (pop == 64) { if (pop + ntz + nlz == 64) {
Pcmpeqd(dst, dst);
} else if (pop + ntz == 64) {
Pcmpeqd(dst, dst);
Psllq(dst, static_cast<byte>(ntz));
} else if (pop + nlz == 64) {
Pcmpeqd(dst, dst); Pcmpeqd(dst, dst);
Psrlq(dst, static_cast<byte>(nlz)); if (ntz) Psllq(dst, static_cast<byte>(ntz + nlz));
if (nlz) Psrlq(dst, static_cast<byte>(nlz));
} else { } else {
uint32_t lower = static_cast<uint32_t>(src); uint32_t lower = static_cast<uint32_t>(src);
uint32_t upper = static_cast<uint32_t>(src >> 32); uint32_t upper = static_cast<uint32_t>(src >> 32);
......
...@@ -187,7 +187,9 @@ class TurboAssembler : public Assembler { ...@@ -187,7 +187,9 @@ class TurboAssembler : public Assembler {
AVX_OP(Movss, movss) AVX_OP(Movss, movss)
AVX_OP(Movsd, movsd) AVX_OP(Movsd, movsd)
AVX_OP(Pcmpeqd, pcmpeqd) AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pslld, pslld)
AVX_OP(Psllq, psllq) AVX_OP(Psllq, psllq)
AVX_OP(Psrld, psrld)
AVX_OP(Psrlq, psrlq) AVX_OP(Psrlq, psrlq)
AVX_OP(Addsd, addsd) AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd) AVX_OP(Mulsd, mulsd)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment