Commit c23c244a authored by whesse@chromium.org's avatar whesse@chromium.org

Reapply change 5989, adding untagged double calls to Math.pow, with problem in...

Reapply change 5989, adding untagged double calls to Math.pow, with problem in generated code for TranscendentalCacheSSE2Stub (unguarded SSE 4.1 code) fixed.
Review URL: http://codereview.chromium.org/5726008

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6010 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent cd2f41f7
......@@ -1354,6 +1354,9 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
case kMathPowHalf:
Abort("MathPowHalf LUnaryMathOperation not implemented");
return NULL;
case kMathLog:
Abort("MathLog LUnaryMathOperation not implemented");
return NULL;
default:
UNREACHABLE();
return NULL;
......
......@@ -56,6 +56,7 @@ namespace internal {
V(FastNewContext) \
V(FastCloneShallowArray) \
V(TranscendentalCache) \
V(TranscendentalCacheSSE2) \
V(GenericUnaryOp) \
V(RevertToNumber) \
V(ToBoolean) \
......
......@@ -2054,8 +2054,9 @@ class TranscendentalCache {
// Allow access to the caches_ array as an ExternalReference.
friend class ExternalReference;
// Inline implementation of the caching.
// Inline implementation of the cache.
friend class TranscendentalCacheStub;
friend class TranscendentalCacheSSE2Stub;
static TranscendentalCache* caches_[kNumberOfCaches];
Element elements_[kCacheSize];
......
......@@ -1380,6 +1380,7 @@ class HUnaryMathOperation: public HUnaryOperation {
break;
case kMathSqrt:
case kMathPowHalf:
case kMathLog:
default:
set_representation(Representation::Double());
}
......@@ -1399,6 +1400,7 @@ class HUnaryMathOperation: public HUnaryOperation {
case kMathCeil:
case kMathSqrt:
case kMathPowHalf:
case kMathLog:
return Representation::Double();
break;
case kMathAbs:
......
......@@ -4087,6 +4087,7 @@ bool HGraphBuilder::TryMathFunctionInline(Call* expr) {
case kMathFloor:
case kMathAbs:
case kMathSqrt:
case kMathLog:
if (argument_count == 2) {
HValue* argument = Pop();
Drop(1); // Receiver.
......
......@@ -2409,6 +2409,7 @@ void Assembler::movsd(XMMRegister dst, const Operand& src) {
emit_sse_operand(dst, src);
}
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
......@@ -2431,6 +2432,17 @@ void Assembler::movd(XMMRegister dst, const Operand& src) {
}
void Assembler::movd(const Operand& dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0x7E);
emit_sse_operand(src, dst);
}
void Assembler::pand(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
......@@ -2465,7 +2477,7 @@ void Assembler::ptest(XMMRegister dst, XMMRegister src) {
}
void Assembler::psllq(XMMRegister reg, int8_t imm8) {
void Assembler::psllq(XMMRegister reg, int8_t shift) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
......@@ -2473,7 +2485,32 @@ void Assembler::psllq(XMMRegister reg, int8_t imm8) {
EMIT(0x0F);
EMIT(0x73);
emit_sse_operand(esi, reg); // esi == 6
EMIT(imm8);
EMIT(shift);
}
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0x70);
emit_sse_operand(dst, src);
EMIT(shuffle);
}
void Assembler::pextrd(const Operand& dst, XMMRegister src, int8_t offset) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x16);
emit_sse_operand(src, dst);
EMIT(offset);
}
......
......@@ -905,13 +905,16 @@ class Assembler : public Malloced {
void movdbl(const Operand& dst, XMMRegister src);
void movd(XMMRegister dst, const Operand& src);
void movd(const Operand& src, XMMRegister dst);
void movsd(XMMRegister dst, XMMRegister src);
void pand(XMMRegister dst, XMMRegister src);
void pxor(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, XMMRegister src);
void psllq(XMMRegister reg, int8_t imm8);
void psllq(XMMRegister reg, int8_t shift);
void pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle);
void pextrd(const Operand& dst, XMMRegister src, int8_t offset);
// Parallel XMM operations.
void movntdqa(XMMRegister src, const Operand& dst);
......
......@@ -2683,6 +2683,143 @@ void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) {
}
void TranscendentalCacheSSE2Stub::Generate(MacroAssembler* masm) {
// Input on stack:
// esp[0]: return address.
// Input in registers:
// xmm1: untagged double input argument.
// Output:
// xmm1: untagged double result.
Label skip_cache;
Label call_runtime;
// Input is an untagged double in xmm1.
// Compute hash (the shifts are arithmetic):
// h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatures::Scope sse4_scope(SSE4_1);
__ pextrd(Operand(edx), xmm1, 0x1); // copy xmm1[63..32] to edx.
} else {
__ pshufd(xmm0, xmm1, 0x1);
__ movd(Operand(edx), xmm0);
}
__ movd(Operand(ebx), xmm1);
// xmm1 = double value
// ebx = low 32 bits of double value
// edx = high 32 bits of double value
// Compute hash (the shifts are arithmetic):
// h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);
__ mov(ecx, ebx);
__ xor_(ecx, Operand(edx));
__ mov(eax, ecx);
__ sar(eax, 16);
__ xor_(ecx, Operand(eax));
__ mov(eax, ecx);
__ sar(eax, 8);
__ xor_(ecx, Operand(eax));
ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));
__ and_(Operand(ecx), Immediate(TranscendentalCache::kCacheSize - 1));
// xmm1 = double value.
// ebx = low 32 bits of double value.
// edx = high 32 bits of double value.
// ecx = TranscendentalCache::hash(double value).
__ mov(eax,
Immediate(ExternalReference::transcendental_cache_array_address()));
// Eax points to cache array.
__ mov(eax, Operand(eax, type_ * sizeof(TranscendentalCache::caches_[0])));
// Eax points to the cache for the type type_.
// If NULL, the cache hasn't been initialized yet, so go through runtime.
__ test(eax, Operand(eax));
__ j(zero, &call_runtime);
#ifdef DEBUG
// Check that the layout of cache elements match expectations.
{ TranscendentalCache::Element test_elem[2];
char* elem_start = reinterpret_cast<char*>(&test_elem[0]);
char* elem2_start = reinterpret_cast<char*>(&test_elem[1]);
char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0]));
char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1]));
char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output));
CHECK_EQ(12, elem2_start - elem_start); // Two uint_32's and a pointer.
CHECK_EQ(0, elem_in0 - elem_start);
CHECK_EQ(kIntSize, elem_in1 - elem_start);
CHECK_EQ(2 * kIntSize, elem_out - elem_start);
}
#endif
// Find the address of the ecx'th entry in the cache, i.e., &eax[ecx*12].
__ lea(ecx, Operand(ecx, ecx, times_2, 0));
__ lea(ecx, Operand(eax, ecx, times_4, 0));
// Check if cache matches: Double value is stored in uint32_t[2] array.
NearLabel cache_miss;
__ cmp(ebx, Operand(ecx, 0));
__ j(not_equal, &cache_miss);
__ cmp(edx, Operand(ecx, kIntSize));
__ j(not_equal, &cache_miss);
// Cache hit!
__ mov(eax, Operand(ecx, 2 * kIntSize));
__ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));
__ Ret();
__ bind(&cache_miss);
// Update cache with new value.
// We are short on registers, so use no_reg as scratch.
// This gives slightly larger code.
__ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);
__ sub(Operand(esp), Immediate(kDoubleSize));
__ movdbl(Operand(esp, 0), xmm1);
__ fld_d(Operand(esp, 0));
__ add(Operand(esp), Immediate(kDoubleSize));
GenerateOperation(masm);
__ mov(Operand(ecx, 0), ebx);
__ mov(Operand(ecx, kIntSize), edx);
__ mov(Operand(ecx, 2 * kIntSize), eax);
__ fstp_d(FieldOperand(eax, HeapNumber::kValueOffset));
__ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));
__ Ret();
__ bind(&skip_cache);
__ sub(Operand(esp), Immediate(kDoubleSize));
__ movdbl(Operand(esp, 0), xmm1);
__ fld_d(Operand(esp, 0));
GenerateOperation(masm);
__ fstp_d(Operand(esp, 0));
__ movdbl(xmm1, Operand(esp, 0));
__ add(Operand(esp), Immediate(kDoubleSize));
__ Ret();
__ bind(&call_runtime);
__ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);
__ push(eax);
__ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm1);
__ CallRuntime(RuntimeFunction(), 1);
__ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));
__ Ret();
}
Runtime::FunctionId TranscendentalCacheSSE2Stub::RuntimeFunction() {
switch (type_) {
// Add more cases when necessary.
case TranscendentalCache::LOG: return Runtime::kMath_log;
default:
UNIMPLEMENTED();
return Runtime::kAbort;
}
}
void TranscendentalCacheSSE2Stub::GenerateOperation(MacroAssembler* masm) {
// Only free register is edi.
// Input value is on FP stack and in xmm1.
ASSERT(type_ == TranscendentalCache::LOG);
__ fldln2();
__ fxch();
__ fyl2x();
}
// Get the integer part of a heap number. Surprisingly, all this bit twiddling
// is faster than using the built-in instructions on floating point registers.
// Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the
......
......@@ -45,6 +45,7 @@ class TranscendentalCacheStub: public CodeStub {
void Generate(MacroAssembler* masm);
private:
TranscendentalCache::Type type_;
Major MajorKey() { return TranscendentalCache; }
int MinorKey() { return type_; }
Runtime::FunctionId RuntimeFunction();
......@@ -52,6 +53,24 @@ class TranscendentalCacheStub: public CodeStub {
};
// Check the transcendental cache, or generate the result, using SSE2.
// The argument and result will be in xmm1.
// Only supports TranscendentalCache::LOG at this point.
class TranscendentalCacheSSE2Stub: public CodeStub {
public:
explicit TranscendentalCacheSSE2Stub(TranscendentalCache::Type type)
: type_(type) {}
void Generate(MacroAssembler* masm);
private:
TranscendentalCache::Type type_;
Major MajorKey() { return TranscendentalCacheSSE2; }
int MinorKey() { return type_; }
Runtime::FunctionId RuntimeFunction();
void GenerateOperation(MacroAssembler* masm);
};
class ToBooleanStub: public CodeStub {
public:
ToBooleanStub() { }
......
......@@ -1107,6 +1107,21 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} else {
UnimplementedInstruction();
}
} else if (*data == 0x3A) {
data++;
if (*data == 0x16) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("pextrd %s,%s,%d",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else {
UnimplementedInstruction();
}
} else if (*data == 0x2E || *data == 0x2F) {
const char* mnem = (*data == 0x2E) ? "ucomisd" : "comisd";
data++;
......@@ -1129,6 +1144,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfCPURegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x54) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("andpd %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x57) {
data++;
int mod, regop, rm;
......@@ -1149,6 +1172,25 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movdqa %s,", NameOfXMMRegister(regop));
data += PrintRightOperand(data);
} else if (*data == 0x70) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("pshufd %s,%s,%d",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0x73) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("psllq %s,%d",
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0x7F) {
AppendToBuffer("movdqa ");
data++;
......@@ -1156,6 +1198,21 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm);
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0x7E) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movd ");
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0xDB) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pand %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xE7) {
AppendToBuffer("movntdq ");
data++;
......@@ -1164,38 +1221,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0xEF) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pxor %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xDB) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pand %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x73) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("psllq %s,%d",
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0x54) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("andpd %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pxor %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else {
UnimplementedInstruction();
}
......
......@@ -2235,6 +2235,13 @@ void LCodeGen::DoPower(LPower* instr) {
}
void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
ASSERT(ToDoubleRegister(instr->result()).is(xmm1));
TranscendentalCacheSSE2Stub stub(TranscendentalCache::LOG);
CallCode(stub.GetCode(), RelocInfo::CODE_TARGET, instr);
}
void LCodeGen::DoUnaryMathOperation(LUnaryMathOperation* instr) {
switch (instr->op()) {
case kMathAbs:
......@@ -2252,6 +2259,10 @@ void LCodeGen::DoUnaryMathOperation(LUnaryMathOperation* instr) {
case kMathPowHalf:
DoMathPowHalf(instr);
break;
case kMathLog:
DoMathLog(instr);
break;
default:
UNREACHABLE();
}
......
......@@ -176,6 +176,7 @@ class LCodeGen BASE_EMBEDDED {
void DoMathRound(LUnaryMathOperation* instr);
void DoMathSqrt(LUnaryMathOperation* instr);
void DoMathPowHalf(LUnaryMathOperation* instr);
void DoMathLog(LUnaryMathOperation* instr);
// Support for recording safepoint and position information.
void RecordSafepoint(LPointerMap* pointers, int deoptimization_index);
......
......@@ -1355,22 +1355,28 @@ LInstruction* LChunkBuilder::DoCallConstantFunction(
LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
MathFunctionId op = instr->op();
LOperand* input = UseRegisterAtStart(instr->value());
LInstruction* result = new LUnaryMathOperation(input);
switch (op) {
case kMathAbs:
return AssignEnvironment(AssignPointerMap(DefineSameAsFirst(result)));
case kMathFloor:
return AssignEnvironment(DefineAsRegister(result));
case kMathRound:
return AssignEnvironment(DefineAsRegister(result));
case kMathSqrt:
return DefineSameAsFirst(result);
case kMathPowHalf:
return AssignEnvironment(DefineSameAsFirst(result));
default:
UNREACHABLE();
return NULL;
if (op == kMathLog) {
LOperand* input = UseFixedDouble(instr->value(), xmm1);
LInstruction* result = new LUnaryMathOperation(input);
return MarkAsCall(DefineFixedDouble(result, xmm1), instr);
} else {
LOperand* input = UseRegisterAtStart(instr->value());
LInstruction* result = new LUnaryMathOperation(input);
switch (op) {
case kMathAbs:
return AssignEnvironment(AssignPointerMap(DefineSameAsFirst(result)));
case kMathFloor:
return AssignEnvironment(DefineAsRegister(result));
case kMathRound:
return AssignEnvironment(DefineAsRegister(result));
case kMathSqrt:
return DefineSameAsFirst(result);
case kMathPowHalf:
return AssignEnvironment(DefineSameAsFirst(result));
default:
UNREACHABLE();
return NULL;
}
}
}
......
......@@ -265,6 +265,7 @@ function SetupMath() {
%SetMathFunctionId($Math.abs, 4);
%SetMathFunctionId($Math.sqrt, 0xd);
%SetMathFunctionId($Math.pow, 0xe);
%SetMathFunctionId($Math.log, 5);
// TODO(erikcorry): Set the id of the other functions so they can be
// optimized.
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment