Introduce andps for IA32/X64

replace andpd and pand in Math.abs

BUG=
R=svenpanne@chromium.org

Review URL: https://codereview.chromium.org/44153002

Patch from Weiliang Lin <weiliang.lin@intel.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17413 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 3e7a1c04
...@@ -2344,6 +2344,14 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) { ...@@ -2344,6 +2344,14 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
} }
void Assembler::andps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x54);
emit_sse_operand(dst, src);
}
void Assembler::pand(XMMRegister dst, XMMRegister src) { void Assembler::pand(XMMRegister dst, XMMRegister src) {
ASSERT(IsEnabled(SSE2)); ASSERT(IsEnabled(SSE2));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
......
...@@ -1017,6 +1017,10 @@ class Assembler : public AssemblerBase { ...@@ -1017,6 +1017,10 @@ class Assembler : public AssemblerBase {
void cpuid(); void cpuid();
// SSE instructions
void andps(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src);
// SSE2 instructions // SSE2 instructions
void cvttss2si(Register dst, const Operand& src); void cvttss2si(Register dst, const Operand& src);
void cvttsd2si(Register dst, const Operand& src); void cvttsd2si(Register dst, const Operand& src);
...@@ -1034,7 +1038,6 @@ class Assembler : public AssemblerBase { ...@@ -1034,7 +1038,6 @@ class Assembler : public AssemblerBase {
void mulsd(XMMRegister dst, const Operand& src); void mulsd(XMMRegister dst, const Operand& src);
void divsd(XMMRegister dst, XMMRegister src); void divsd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src);
void sqrtsd(XMMRegister dst, XMMRegister src); void sqrtsd(XMMRegister dst, XMMRegister src);
void andpd(XMMRegister dst, XMMRegister src); void andpd(XMMRegister dst, XMMRegister src);
......
...@@ -1042,6 +1042,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1042,6 +1042,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop), NameOfXMMRegister(regop),
NameOfXMMRegister(rm)); NameOfXMMRegister(rm));
data++; data++;
} else if (f0byte == 0x54) {
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("andps %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (f0byte == 0x57) { } else if (f0byte == 0x57) {
data += 2; data += 2;
int mod, regop, rm; int mod, regop, rm;
......
...@@ -3883,7 +3883,7 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) { ...@@ -3883,7 +3883,7 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
XMMRegister input_reg = ToDoubleRegister(instr->value()); XMMRegister input_reg = ToDoubleRegister(instr->value());
__ xorps(scratch, scratch); __ xorps(scratch, scratch);
__ subsd(scratch, input_reg); __ subsd(scratch, input_reg);
__ pand(input_reg, scratch); __ andps(input_reg, scratch);
} else if (r.IsSmiOrInteger32()) { } else if (r.IsSmiOrInteger32()) {
EmitIntegerMathAbs(instr); EmitIntegerMathAbs(instr);
} else { // Tagged case. } else { // Tagged case.
......
...@@ -2476,6 +2476,17 @@ void Assembler::emit_farith(int b1, int b2, int i) { ...@@ -2476,6 +2476,17 @@ void Assembler::emit_farith(int b1, int b2, int i) {
} }
// SSE operations.
void Assembler::andps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x54);
emit_sse_operand(dst, src);
}
// SSE 2 operations. // SSE 2 operations.
void Assembler::movd(XMMRegister dst, Register src) { void Assembler::movd(XMMRegister dst, Register src) {
......
...@@ -1346,13 +1346,26 @@ class Assembler : public AssemblerBase { ...@@ -1346,13 +1346,26 @@ class Assembler : public AssemblerBase {
void sahf(); void sahf();
// SSE instructions
void movaps(XMMRegister dst, XMMRegister src);
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
void cvttss2si(Register dst, const Operand& src);
void cvttss2si(Register dst, XMMRegister src);
void cvtlsi2ss(XMMRegister dst, Register src);
void xorps(XMMRegister dst, XMMRegister src);
void andps(XMMRegister dst, XMMRegister src);
void movmskps(Register dst, XMMRegister src);
// SSE2 instructions // SSE2 instructions
void movd(XMMRegister dst, Register src); void movd(XMMRegister dst, Register src);
void movd(Register dst, XMMRegister src); void movd(Register dst, XMMRegister src);
void movq(XMMRegister dst, Register src); void movq(XMMRegister dst, Register src);
void movq(Register dst, XMMRegister src); void movq(Register dst, XMMRegister src);
void movq(XMMRegister dst, XMMRegister src); void movq(XMMRegister dst, XMMRegister src);
void extractps(Register dst, XMMRegister src, byte imm8);
// Don't use this unless it's important to keep the // Don't use this unless it's important to keep the
// top half of the destination register unchanged. // top half of the destination register unchanged.
...@@ -1370,13 +1383,7 @@ class Assembler : public AssemblerBase { ...@@ -1370,13 +1383,7 @@ class Assembler : public AssemblerBase {
void movdqu(XMMRegister dst, const Operand& src); void movdqu(XMMRegister dst, const Operand& src);
void movapd(XMMRegister dst, XMMRegister src); void movapd(XMMRegister dst, XMMRegister src);
void movaps(XMMRegister dst, XMMRegister src);
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
void cvttss2si(Register dst, const Operand& src);
void cvttss2si(Register dst, XMMRegister src);
void cvttsd2si(Register dst, const Operand& src); void cvttsd2si(Register dst, const Operand& src);
void cvttsd2si(Register dst, XMMRegister src); void cvttsd2si(Register dst, XMMRegister src);
void cvttsd2siq(Register dst, XMMRegister src); void cvttsd2siq(Register dst, XMMRegister src);
...@@ -1386,7 +1393,6 @@ class Assembler : public AssemblerBase { ...@@ -1386,7 +1393,6 @@ class Assembler : public AssemblerBase {
void cvtqsi2sd(XMMRegister dst, const Operand& src); void cvtqsi2sd(XMMRegister dst, const Operand& src);
void cvtqsi2sd(XMMRegister dst, Register src); void cvtqsi2sd(XMMRegister dst, Register src);
void cvtlsi2ss(XMMRegister dst, Register src);
void cvtss2sd(XMMRegister dst, XMMRegister src); void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtss2sd(XMMRegister dst, const Operand& src); void cvtss2sd(XMMRegister dst, const Operand& src);
...@@ -1405,11 +1411,16 @@ class Assembler : public AssemblerBase { ...@@ -1405,11 +1411,16 @@ class Assembler : public AssemblerBase {
void andpd(XMMRegister dst, XMMRegister src); void andpd(XMMRegister dst, XMMRegister src);
void orpd(XMMRegister dst, XMMRegister src); void orpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src);
void sqrtsd(XMMRegister dst, XMMRegister src); void sqrtsd(XMMRegister dst, XMMRegister src);
void ucomisd(XMMRegister dst, XMMRegister src); void ucomisd(XMMRegister dst, XMMRegister src);
void ucomisd(XMMRegister dst, const Operand& src); void ucomisd(XMMRegister dst, const Operand& src);
void cmpltsd(XMMRegister dst, XMMRegister src);
void movmskpd(Register dst, XMMRegister src);
// SSE 4.1 instruction
void extractps(Register dst, XMMRegister src, byte imm8);
enum RoundingMode { enum RoundingMode {
kRoundToNearest = 0x0, kRoundToNearest = 0x0,
...@@ -1420,17 +1431,6 @@ class Assembler : public AssemblerBase { ...@@ -1420,17 +1431,6 @@ class Assembler : public AssemblerBase {
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode); void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void movmskpd(Register dst, XMMRegister src);
void movmskps(Register dst, XMMRegister src);
void cmpltsd(XMMRegister dst, XMMRegister src);
// The first argument is the reg field, the second argument is the r/m field.
void emit_sse_operand(XMMRegister dst, XMMRegister src);
void emit_sse_operand(XMMRegister reg, const Operand& adr);
void emit_sse_operand(XMMRegister dst, Register src);
void emit_sse_operand(Register dst, XMMRegister src);
// Debugging // Debugging
void Print(); void Print();
...@@ -1611,6 +1611,12 @@ class Assembler : public AssemblerBase { ...@@ -1611,6 +1611,12 @@ class Assembler : public AssemblerBase {
// Emit the code-object-relative offset of the label's position // Emit the code-object-relative offset of the label's position
inline void emit_code_relative_offset(Label* label); inline void emit_code_relative_offset(Label* label);
// The first argument is the reg field, the second argument is the r/m field.
void emit_sse_operand(XMMRegister dst, XMMRegister src);
void emit_sse_operand(XMMRegister reg, const Operand& adr);
void emit_sse_operand(XMMRegister dst, Register src);
void emit_sse_operand(Register dst, XMMRegister src);
// Emit machine code for one of the operations ADD, ADC, SUB, SBC, // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
// AND, OR, XOR, or CMP. The encodings of these operations are all // AND, OR, XOR, or CMP. The encodings of these operations are all
// similar, differing just in the opcode or in the reg field of the // similar, differing just in the opcode or in the reg field of the
......
...@@ -1260,6 +1260,13 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { ...@@ -1260,6 +1260,13 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
byte_size_operand_ = idesc.byte_size_operation; byte_size_operand_ = idesc.byte_size_operation;
current += PrintOperands(idesc.mnem, idesc.op_order_, current); current += PrintOperands(idesc.mnem, idesc.op_order_, current);
} else if (opcode == 0x54) {
// xorps xmm, xmm/m128
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("andps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x57) { } else if (opcode == 0x57) {
// xorps xmm, xmm/m128 // xorps xmm, xmm/m128
int mod, regop, rm; int mod, regop, rm;
......
...@@ -3369,7 +3369,7 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) { ...@@ -3369,7 +3369,7 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
XMMRegister input_reg = ToDoubleRegister(instr->value()); XMMRegister input_reg = ToDoubleRegister(instr->value());
__ xorps(scratch, scratch); __ xorps(scratch, scratch);
__ subsd(scratch, input_reg); __ subsd(scratch, input_reg);
__ andpd(input_reg, scratch); __ andps(input_reg, scratch);
} else if (r.IsInteger32()) { } else if (r.IsInteger32()) {
EmitIntegerMathAbs(instr); EmitIntegerMathAbs(instr);
} else if (r.IsSmi()) { } else if (r.IsSmi()) {
......
...@@ -354,19 +354,29 @@ TEST(DisasmIa320) { ...@@ -354,19 +354,29 @@ TEST(DisasmIa320) {
CpuFeatureScope fscope(&assm, SSE2); CpuFeatureScope fscope(&assm, SSE2);
__ cvttss2si(edx, Operand(ebx, ecx, times_4, 10000)); __ cvttss2si(edx, Operand(ebx, ecx, times_4, 10000));
__ cvtsi2sd(xmm1, Operand(ebx, ecx, times_4, 10000)); __ cvtsi2sd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ addsd(xmm1, xmm0);
__ mulsd(xmm1, xmm0);
__ subsd(xmm1, xmm0);
__ divsd(xmm1, xmm0);
__ movsd(xmm1, Operand(ebx, ecx, times_4, 10000)); __ movsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ movsd(Operand(ebx, ecx, times_4, 10000), xmm1); __ movsd(Operand(ebx, ecx, times_4, 10000), xmm1);
__ ucomisd(xmm0, xmm1); __ movaps(xmm0, xmm1);
// 128 bit move instructions. // 128 bit move instructions.
__ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000)); __ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0); __ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0);
__ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000)); __ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0); __ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ addsd(xmm1, xmm0);
__ mulsd(xmm1, xmm0);
__ subsd(xmm1, xmm0);
__ divsd(xmm1, xmm0);
__ ucomisd(xmm0, xmm1);
__ cmpltsd(xmm0, xmm1);
__ andps(xmm0, xmm1);
__ andpd(xmm0, xmm1);
__ psllq(xmm0, 17);
__ psllq(xmm0, xmm1);
__ psrlq(xmm0, 17);
__ psrlq(xmm0, xmm1);
__ por(xmm0, xmm1);
} }
} }
...@@ -393,36 +403,6 @@ TEST(DisasmIa320) { ...@@ -393,36 +403,6 @@ TEST(DisasmIa320) {
} }
} }
// andpd, cmpltsd, movaps, psllq, psrlq, por.
{
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope fscope(&assm, SSE2);
__ andpd(xmm0, xmm1);
__ andpd(xmm1, xmm2);
__ cmpltsd(xmm0, xmm1);
__ cmpltsd(xmm1, xmm2);
__ movaps(xmm0, xmm1);
__ movaps(xmm1, xmm2);
__ psllq(xmm0, 17);
__ psllq(xmm1, 42);
__ psllq(xmm0, xmm1);
__ psllq(xmm1, xmm2);
__ psrlq(xmm0, 17);
__ psrlq(xmm1, 42);
__ psrlq(xmm0, xmm1);
__ psrlq(xmm1, xmm2);
__ por(xmm0, xmm1);
__ por(xmm1, xmm2);
}
}
{ {
if (CpuFeatures::IsSupported(SSE2) && if (CpuFeatures::IsSupported(SSE2) &&
CpuFeatures::IsSupported(SSE4_1)) { CpuFeatures::IsSupported(SSE4_1)) {
......
...@@ -335,32 +335,37 @@ TEST(DisasmX64) { ...@@ -335,32 +335,37 @@ TEST(DisasmX64) {
__ fcompp(); __ fcompp();
__ fwait(); __ fwait();
__ nop(); __ nop();
// SSE instruction
{ {
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatures::Scope fscope(SSE2);
__ cvttss2si(rdx, Operand(rbx, rcx, times_4, 10000)); __ cvttss2si(rdx, Operand(rbx, rcx, times_4, 10000));
__ cvttss2si(rdx, xmm1); __ cvttss2si(rdx, xmm1);
__ movaps(xmm0, xmm1);
__ andps(xmm0, xmm1);
}
// SSE 2 instructions
{
__ cvttsd2si(rdx, Operand(rbx, rcx, times_4, 10000)); __ cvttsd2si(rdx, Operand(rbx, rcx, times_4, 10000));
__ cvttsd2si(rdx, xmm1); __ cvttsd2si(rdx, xmm1);
__ cvttsd2siq(rdx, xmm1); __ cvttsd2siq(rdx, xmm1);
__ movsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ movsd(Operand(rbx, rcx, times_4, 10000), xmm1);
// 128 bit move instructions.
__ movdqa(xmm0, Operand(rbx, rcx, times_4, 10000));
__ movdqa(Operand(rbx, rcx, times_4, 10000), xmm0);
__ addsd(xmm1, xmm0); __ addsd(xmm1, xmm0);
__ mulsd(xmm1, xmm0); __ mulsd(xmm1, xmm0);
__ subsd(xmm1, xmm0); __ subsd(xmm1, xmm0);
__ divsd(xmm1, xmm0); __ divsd(xmm1, xmm0);
__ movsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ movsd(Operand(rbx, rcx, times_4, 10000), xmm1);
__ ucomisd(xmm0, xmm1); __ ucomisd(xmm0, xmm1);
// 128 bit move instructions. __ andpd(xmm0, xmm1);
__ movdqa(xmm0, Operand(rbx, rcx, times_4, 10000));
__ movdqa(Operand(rbx, rcx, times_4, 10000), xmm0);
}
} }
// cmov. // cmov.
{ {
if (CpuFeatures::IsSupported(CMOV)) {
CpuFeatures::Scope use_cmov(CMOV);
__ cmovq(overflow, rax, Operand(rax, 0)); __ cmovq(overflow, rax, Operand(rax, 0));
__ cmovq(no_overflow, rax, Operand(rax, 1)); __ cmovq(no_overflow, rax, Operand(rax, 1));
__ cmovq(below, rax, Operand(rax, 2)); __ cmovq(below, rax, Operand(rax, 2));
...@@ -378,19 +383,6 @@ TEST(DisasmX64) { ...@@ -378,19 +383,6 @@ TEST(DisasmX64) {
__ cmovq(less_equal, rax, Operand(rdx, 2)); __ cmovq(less_equal, rax, Operand(rdx, 2));
__ cmovq(greater, rax, Operand(rdx, 3)); __ cmovq(greater, rax, Operand(rdx, 3));
} }
}
// andpd, etc.
{
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatures::Scope fscope(SSE2);
__ andpd(xmm0, xmm1);
__ andpd(xmm1, xmm2);
__ movaps(xmm0, xmm1);
__ movaps(xmm1, xmm2);
}
}
{ {
if (CpuFeatures::IsSupported(SSE4_1)) { if (CpuFeatures::IsSupported(SSE4_1)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment