Introduce andps for IA32/X64

replace andpd and pand in Math.abs

BUG=
R=svenpanne@chromium.org

Review URL: https://codereview.chromium.org/44153002

Patch from Weiliang Lin <weiliang.lin@intel.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17413 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 3e7a1c04
......@@ -2344,6 +2344,14 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
}
void Assembler::andps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x54);
emit_sse_operand(dst, src);
}
void Assembler::pand(XMMRegister dst, XMMRegister src) {
ASSERT(IsEnabled(SSE2));
EnsureSpace ensure_space(this);
......
......@@ -1017,6 +1017,10 @@ class Assembler : public AssemblerBase {
void cpuid();
// SSE instructions
void andps(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src);
// SSE2 instructions
void cvttss2si(Register dst, const Operand& src);
void cvttsd2si(Register dst, const Operand& src);
......@@ -1034,7 +1038,6 @@ class Assembler : public AssemblerBase {
void mulsd(XMMRegister dst, const Operand& src);
void divsd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src);
void sqrtsd(XMMRegister dst, XMMRegister src);
void andpd(XMMRegister dst, XMMRegister src);
......@@ -1157,7 +1160,7 @@ class Assembler : public AssemblerBase {
// Avoid overflows for displacements etc.
static const int kMaximalBufferSize = 512*MB;
byte byte_at(int pos) { return buffer_[pos]; }
byte byte_at(int pos) { return buffer_[pos]; }
void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
protected:
......
......@@ -1042,6 +1042,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (f0byte == 0x54) {
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("andps %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (f0byte == 0x57) {
data += 2;
int mod, regop, rm;
......
......@@ -3883,7 +3883,7 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
XMMRegister input_reg = ToDoubleRegister(instr->value());
__ xorps(scratch, scratch);
__ subsd(scratch, input_reg);
__ pand(input_reg, scratch);
__ andps(input_reg, scratch);
} else if (r.IsSmiOrInteger32()) {
EmitIntegerMathAbs(instr);
} else { // Tagged case.
......
......@@ -2476,6 +2476,17 @@ void Assembler::emit_farith(int b1, int b2, int i) {
}
// SSE operations.
void Assembler::andps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x54);
emit_sse_operand(dst, src);
}
// SSE 2 operations.
void Assembler::movd(XMMRegister dst, Register src) {
......
......@@ -1346,13 +1346,26 @@ class Assembler : public AssemblerBase {
void sahf();
// SSE instructions
void movaps(XMMRegister dst, XMMRegister src);
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
void cvttss2si(Register dst, const Operand& src);
void cvttss2si(Register dst, XMMRegister src);
void cvtlsi2ss(XMMRegister dst, Register src);
void xorps(XMMRegister dst, XMMRegister src);
void andps(XMMRegister dst, XMMRegister src);
void movmskps(Register dst, XMMRegister src);
// SSE2 instructions
void movd(XMMRegister dst, Register src);
void movd(Register dst, XMMRegister src);
void movq(XMMRegister dst, Register src);
void movq(Register dst, XMMRegister src);
void movq(XMMRegister dst, XMMRegister src);
void extractps(Register dst, XMMRegister src, byte imm8);
// Don't use this unless it's important to keep the
// top half of the destination register unchanged.
......@@ -1370,13 +1383,7 @@ class Assembler : public AssemblerBase {
void movdqu(XMMRegister dst, const Operand& src);
void movapd(XMMRegister dst, XMMRegister src);
void movaps(XMMRegister dst, XMMRegister src);
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
void cvttss2si(Register dst, const Operand& src);
void cvttss2si(Register dst, XMMRegister src);
void cvttsd2si(Register dst, const Operand& src);
void cvttsd2si(Register dst, XMMRegister src);
void cvttsd2siq(Register dst, XMMRegister src);
......@@ -1386,7 +1393,6 @@ class Assembler : public AssemblerBase {
void cvtqsi2sd(XMMRegister dst, const Operand& src);
void cvtqsi2sd(XMMRegister dst, Register src);
void cvtlsi2ss(XMMRegister dst, Register src);
void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtss2sd(XMMRegister dst, const Operand& src);
......@@ -1405,11 +1411,16 @@ class Assembler : public AssemblerBase {
void andpd(XMMRegister dst, XMMRegister src);
void orpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src);
void sqrtsd(XMMRegister dst, XMMRegister src);
void ucomisd(XMMRegister dst, XMMRegister src);
void ucomisd(XMMRegister dst, const Operand& src);
void cmpltsd(XMMRegister dst, XMMRegister src);
void movmskpd(Register dst, XMMRegister src);
// SSE 4.1 instruction
void extractps(Register dst, XMMRegister src, byte imm8);
enum RoundingMode {
kRoundToNearest = 0x0,
......@@ -1420,17 +1431,6 @@ class Assembler : public AssemblerBase {
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void movmskpd(Register dst, XMMRegister src);
void movmskps(Register dst, XMMRegister src);
void cmpltsd(XMMRegister dst, XMMRegister src);
// The first argument is the reg field, the second argument is the r/m field.
void emit_sse_operand(XMMRegister dst, XMMRegister src);
void emit_sse_operand(XMMRegister reg, const Operand& adr);
void emit_sse_operand(XMMRegister dst, Register src);
void emit_sse_operand(Register dst, XMMRegister src);
// Debugging
void Print();
......@@ -1611,6 +1611,12 @@ class Assembler : public AssemblerBase {
// Emit the code-object-relative offset of the label's position
inline void emit_code_relative_offset(Label* label);
// The first argument is the reg field, the second argument is the r/m field.
void emit_sse_operand(XMMRegister dst, XMMRegister src);
void emit_sse_operand(XMMRegister reg, const Operand& adr);
void emit_sse_operand(XMMRegister dst, Register src);
void emit_sse_operand(Register dst, XMMRegister src);
// Emit machine code for one of the operations ADD, ADC, SUB, SBC,
// AND, OR, XOR, or CMP. The encodings of these operations are all
// similar, differing just in the opcode or in the reg field of the
......
......@@ -1260,6 +1260,13 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
byte_size_operand_ = idesc.byte_size_operation;
current += PrintOperands(idesc.mnem, idesc.op_order_, current);
} else if (opcode == 0x54) {
// xorps xmm, xmm/m128
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("andps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x57) {
// xorps xmm, xmm/m128
int mod, regop, rm;
......
......@@ -3369,7 +3369,7 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
XMMRegister input_reg = ToDoubleRegister(instr->value());
__ xorps(scratch, scratch);
__ subsd(scratch, input_reg);
__ andpd(input_reg, scratch);
__ andps(input_reg, scratch);
} else if (r.IsInteger32()) {
EmitIntegerMathAbs(instr);
} else if (r.IsSmi()) {
......
......@@ -354,19 +354,29 @@ TEST(DisasmIa320) {
CpuFeatureScope fscope(&assm, SSE2);
__ cvttss2si(edx, Operand(ebx, ecx, times_4, 10000));
__ cvtsi2sd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ addsd(xmm1, xmm0);
__ mulsd(xmm1, xmm0);
__ subsd(xmm1, xmm0);
__ divsd(xmm1, xmm0);
__ movsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ movsd(Operand(ebx, ecx, times_4, 10000), xmm1);
__ ucomisd(xmm0, xmm1);
__ movaps(xmm0, xmm1);
// 128 bit move instructions.
__ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0);
__ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ addsd(xmm1, xmm0);
__ mulsd(xmm1, xmm0);
__ subsd(xmm1, xmm0);
__ divsd(xmm1, xmm0);
__ ucomisd(xmm0, xmm1);
__ cmpltsd(xmm0, xmm1);
__ andps(xmm0, xmm1);
__ andpd(xmm0, xmm1);
__ psllq(xmm0, 17);
__ psllq(xmm0, xmm1);
__ psrlq(xmm0, 17);
__ psrlq(xmm0, xmm1);
__ por(xmm0, xmm1);
}
}
......@@ -393,36 +403,6 @@ TEST(DisasmIa320) {
}
}
// andpd, cmpltsd, movaps, psllq, psrlq, por.
{
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope fscope(&assm, SSE2);
__ andpd(xmm0, xmm1);
__ andpd(xmm1, xmm2);
__ cmpltsd(xmm0, xmm1);
__ cmpltsd(xmm1, xmm2);
__ movaps(xmm0, xmm1);
__ movaps(xmm1, xmm2);
__ psllq(xmm0, 17);
__ psllq(xmm1, 42);
__ psllq(xmm0, xmm1);
__ psllq(xmm1, xmm2);
__ psrlq(xmm0, 17);
__ psrlq(xmm1, 42);
__ psrlq(xmm0, xmm1);
__ psrlq(xmm1, xmm2);
__ por(xmm0, xmm1);
__ por(xmm1, xmm2);
}
}
{
if (CpuFeatures::IsSupported(SSE2) &&
CpuFeatures::IsSupported(SSE4_1)) {
......
......@@ -335,61 +335,53 @@ TEST(DisasmX64) {
__ fcompp();
__ fwait();
__ nop();
// SSE instruction
{
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatures::Scope fscope(SSE2);
__ cvttss2si(rdx, Operand(rbx, rcx, times_4, 10000));
__ cvttss2si(rdx, xmm1);
__ cvttsd2si(rdx, Operand(rbx, rcx, times_4, 10000));
__ cvttsd2si(rdx, xmm1);
__ cvttsd2siq(rdx, xmm1);
__ addsd(xmm1, xmm0);
__ mulsd(xmm1, xmm0);
__ subsd(xmm1, xmm0);
__ divsd(xmm1, xmm0);
__ movsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ movsd(Operand(rbx, rcx, times_4, 10000), xmm1);
__ ucomisd(xmm0, xmm1);
// 128 bit move instructions.
__ movdqa(xmm0, Operand(rbx, rcx, times_4, 10000));
__ movdqa(Operand(rbx, rcx, times_4, 10000), xmm0);
}
}
__ cvttss2si(rdx, Operand(rbx, rcx, times_4, 10000));
__ cvttss2si(rdx, xmm1);
__ movaps(xmm0, xmm1);
// cmov.
__ andps(xmm0, xmm1);
}
// SSE 2 instructions
{
if (CpuFeatures::IsSupported(CMOV)) {
CpuFeatures::Scope use_cmov(CMOV);
__ cmovq(overflow, rax, Operand(rax, 0));
__ cmovq(no_overflow, rax, Operand(rax, 1));
__ cmovq(below, rax, Operand(rax, 2));
__ cmovq(above_equal, rax, Operand(rax, 3));
__ cmovq(equal, rax, Operand(rbx, 0));
__ cmovq(not_equal, rax, Operand(rbx, 1));
__ cmovq(below_equal, rax, Operand(rbx, 2));
__ cmovq(above, rax, Operand(rbx, 3));
__ cmovq(sign, rax, Operand(rcx, 0));
__ cmovq(not_sign, rax, Operand(rcx, 1));
__ cmovq(parity_even, rax, Operand(rcx, 2));
__ cmovq(parity_odd, rax, Operand(rcx, 3));
__ cmovq(less, rax, Operand(rdx, 0));
__ cmovq(greater_equal, rax, Operand(rdx, 1));
__ cmovq(less_equal, rax, Operand(rdx, 2));
__ cmovq(greater, rax, Operand(rdx, 3));
}
__ cvttsd2si(rdx, Operand(rbx, rcx, times_4, 10000));
__ cvttsd2si(rdx, xmm1);
__ cvttsd2siq(rdx, xmm1);
__ movsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ movsd(Operand(rbx, rcx, times_4, 10000), xmm1);
// 128 bit move instructions.
__ movdqa(xmm0, Operand(rbx, rcx, times_4, 10000));
__ movdqa(Operand(rbx, rcx, times_4, 10000), xmm0);
__ addsd(xmm1, xmm0);
__ mulsd(xmm1, xmm0);
__ subsd(xmm1, xmm0);
__ divsd(xmm1, xmm0);
__ ucomisd(xmm0, xmm1);
__ andpd(xmm0, xmm1);
}
// andpd, etc.
// cmov.
{
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatures::Scope fscope(SSE2);
__ andpd(xmm0, xmm1);
__ andpd(xmm1, xmm2);
__ movaps(xmm0, xmm1);
__ movaps(xmm1, xmm2);
}
__ cmovq(overflow, rax, Operand(rax, 0));
__ cmovq(no_overflow, rax, Operand(rax, 1));
__ cmovq(below, rax, Operand(rax, 2));
__ cmovq(above_equal, rax, Operand(rax, 3));
__ cmovq(equal, rax, Operand(rbx, 0));
__ cmovq(not_equal, rax, Operand(rbx, 1));
__ cmovq(below_equal, rax, Operand(rbx, 2));
__ cmovq(above, rax, Operand(rbx, 3));
__ cmovq(sign, rax, Operand(rcx, 0));
__ cmovq(not_sign, rax, Operand(rcx, 1));
__ cmovq(parity_even, rax, Operand(rcx, 2));
__ cmovq(parity_odd, rax, Operand(rcx, 3));
__ cmovq(less, rax, Operand(rdx, 0));
__ cmovq(greater_equal, rax, Operand(rdx, 1));
__ cmovq(less_equal, rax, Operand(rdx, 2));
__ cmovq(greater, rax, Operand(rdx, 3));
}
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment