Commit 404fbb5b authored by vitalyr@chromium.org's avatar vitalyr@chromium.org

SSE2 truncating double-to-i.

Review URL: http://codereview.chromium.org/6049008

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6269 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent c4550bc6
......@@ -66,6 +66,7 @@ namespace internal {
const double DoubleConstant::min_int = kMinInt;
const double DoubleConstant::one_half = 0.5;
const double DoubleConstant::minus_zero = -0.0;
const double DoubleConstant::negative_infinity = -V8_INFINITY;
......@@ -729,6 +730,12 @@ ExternalReference ExternalReference::address_of_one_half() {
}
ExternalReference ExternalReference::address_of_minus_zero() {
return ExternalReference(reinterpret_cast<void*>(
const_cast<double*>(&DoubleConstant::minus_zero)));
}
ExternalReference ExternalReference::address_of_negative_infinity() {
return ExternalReference(reinterpret_cast<void*>(
const_cast<double*>(&DoubleConstant::negative_infinity)));
......
......@@ -50,6 +50,7 @@ class DoubleConstant: public AllStatic {
public:
static const double min_int;
static const double one_half;
static const double minus_zero;
static const double negative_infinity;
};
......@@ -555,6 +556,7 @@ class ExternalReference BASE_EMBEDDED {
// Static variables containing common double constants.
static ExternalReference address_of_min_int();
static ExternalReference address_of_one_half();
static ExternalReference address_of_minus_zero();
static ExternalReference address_of_negative_infinity();
Address address() const {return reinterpret_cast<Address>(address_);}
......
......@@ -2465,6 +2465,17 @@ void Assembler::pxor(XMMRegister dst, XMMRegister src) {
}
void Assembler::por(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0xEB);
emit_sse_operand(dst, src);
}
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
......@@ -2489,6 +2500,40 @@ void Assembler::psllq(XMMRegister reg, int8_t shift) {
}
void Assembler::psllq(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0xF3);
emit_sse_operand(dst, src);
}
void Assembler::psrlq(XMMRegister reg, int8_t shift) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0x73);
emit_sse_operand(edx, reg); // edx == 2
EMIT(shift);
}
void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0xD3);
emit_sse_operand(dst, src);
}
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
......
......@@ -919,9 +919,13 @@ class Assembler : public Malloced {
void pand(XMMRegister dst, XMMRegister src);
void pxor(XMMRegister dst, XMMRegister src);
void por(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, XMMRegister src);
void psllq(XMMRegister reg, int8_t shift);
void psllq(XMMRegister dst, XMMRegister src);
void psrlq(XMMRegister reg, int8_t shift);
void psrlq(XMMRegister dst, XMMRegister src);
void pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle);
void pextrd(const Operand& dst, XMMRegister src, int8_t offset);
......
......@@ -1182,15 +1182,33 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0xF3) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psllq %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x73) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("psllq %s,%d",
ASSERT(regop == esi || regop == edx);
AppendToBuffer("%s %s,%d",
(regop == esi) ? "psllq" : "psrlq",
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0xD3) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psrlq %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x7F) {
AppendToBuffer("movdqa ");
data++;
......@@ -1228,6 +1246,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xEB) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("por %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else {
UnimplementedInstruction();
}
......
......@@ -2971,9 +2971,60 @@ void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
__ add(Operand(esp), Immediate(kDoubleSize));
__ bind(&done);
} else {
// This will bail out if the input was not in the int32 range (or,
// unfortunately, if the input was 0x80000000).
DeoptimizeIf(equal, instr->environment());
NearLabel done;
Register temp_reg = ToRegister(instr->temporary());
XMMRegister xmm_scratch = xmm0;
// If cvttsd2si succeeded, we're done. Otherwise, we attempt
// manual conversion.
__ j(not_equal, &done);
// Get high 32 bits of the input in result_reg and temp_reg.
__ pshufd(xmm_scratch, input_reg, 1);
__ movd(Operand(temp_reg), xmm_scratch);
__ mov(result_reg, temp_reg);
// Prepare negation mask in temp_reg.
__ sar(temp_reg, kBitsPerInt - 1);
// Extract the exponent from result_reg and subtract adjusted
// bias from it. The adjustment is selected in a way such that
// when the difference is zero, the answer is in the low 32 bits
// of the input, otherwise a shift has to be performed.
__ shr(result_reg, HeapNumber::kExponentShift);
__ and_(result_reg,
HeapNumber::kExponentMask >> HeapNumber::kExponentShift);
__ sub(Operand(result_reg),
Immediate(HeapNumber::kExponentBias +
HeapNumber::kExponentBits +
HeapNumber::kMantissaBits));
// Don't handle big (> kMantissaBits + kExponentBits == 63) or
// special exponents.
DeoptimizeIf(greater, instr->environment());
// Zero out the sign and the exponent in the input (by shifting
// it to the left) and restore the implicit mantissa bit,
// i.e. convert the input to unsigned int64 shifted left by
// kExponentBits.
ExternalReference minus_zero = ExternalReference::address_of_minus_zero();
// Minus zero has the most significant bit set and the other
// bits cleared.
__ movdbl(xmm_scratch, Operand::StaticVariable(minus_zero));
__ psllq(input_reg, HeapNumber::kExponentBits);
__ por(input_reg, xmm_scratch);
// Get the amount to shift the input right in xmm_scratch.
__ neg(result_reg);
__ movd(xmm_scratch, Operand(result_reg));
// Shift the input right and extract low 32 bits.
__ psrlq(input_reg, xmm_scratch);
__ movd(Operand(result_reg), input_reg);
// Use the prepared mask in temp_reg to negate the result if necessary.
__ xor_(result_reg, Operand(temp_reg));
__ sub(result_reg, Operand(temp_reg));
__ bind(&done);
}
} else {
NearLabel done;
......
......@@ -1572,8 +1572,12 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
return AssignPointerMap(Define(result, result_temp));
} else {
ASSERT(to.IsInteger32());
LOperand* value = UseRegister(instr->value());
return AssignEnvironment(DefineAsRegister(new LDoubleToI(value)));
bool needs_temp = instr->CanTruncateToInt32() &&
!CpuFeatures::IsSupported(SSE3);
LOperand* value = needs_temp ?
UseTempRegister(instr->value()) : UseRegister(instr->value());
LOperand* temp = needs_temp ? TempRegister() : NULL;
return AssignEnvironment(DefineAsRegister(new LDoubleToI(value, temp)));
}
} else if (from.IsInteger32()) {
if (to.IsTagged()) {
......
......@@ -1481,12 +1481,17 @@ class LNumberTagD: public LUnaryOperation<1> {
// Sometimes truncating conversion from a tagged value to an int32.
class LDoubleToI: public LUnaryOperation<1> {
public:
explicit LDoubleToI(LOperand* value) : LUnaryOperation<1>(value) { }
LDoubleToI(LOperand* value, LOperand* temporary)
: LUnaryOperation<1>(value), temporary_(temporary) { }
DECLARE_CONCRETE_INSTRUCTION(DoubleToI, "double-to-i")
DECLARE_HYDROGEN_ACCESSOR(Change)
bool truncating() { return hydrogen()->CanTruncateToInt32(); }
LOperand* temporary() const { return temporary_; }
private:
LOperand* temporary_;
};
......
......@@ -486,17 +486,21 @@ void ExternalReferenceTable::PopulateTable() {
UNCLASSIFIED,
36,
"LDoubleConstant::one_half");
Add(ExternalReference::address_of_negative_infinity().address(),
Add(ExternalReference::address_of_minus_zero().address(),
UNCLASSIFIED,
37,
"LDoubleConstant::minus_zero");
Add(ExternalReference::address_of_negative_infinity().address(),
UNCLASSIFIED,
38,
"LDoubleConstant::negative_infinity");
Add(ExternalReference::power_double_double_function().address(),
UNCLASSIFIED,
38,
39,
"power_double_double_function");
Add(ExternalReference::power_double_int_function().address(),
UNCLASSIFIED,
39,
40,
"power_double_int_function");
Add(ExternalReference::arguments_marker_location().address(),
UNCLASSIFIED,
......
......@@ -416,7 +416,7 @@ TEST(DisasmIa320) {
}
}
// andpd, cmpltsd, movaps, psllq.
// andpd, cmpltsd, movaps, psllq, psrlq, por.
{
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatures::Scope fscope(SSE2);
......@@ -431,6 +431,18 @@ TEST(DisasmIa320) {
__ psllq(xmm0, 17);
__ psllq(xmm1, 42);
__ psllq(xmm0, xmm1);
__ psllq(xmm1, xmm2);
__ psrlq(xmm0, 17);
__ psrlq(xmm1, 42);
__ psrlq(xmm0, xmm1);
__ psrlq(xmm1, xmm2);
__ por(xmm0, xmm1);
__ por(xmm1, xmm2);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment