Commit c3ff68b6 authored by mbrandy's avatar mbrandy Committed by Commit bot

Detect cache line size on Linux for PPC hosts.

In the interest of generalization, this change:
- Consolidates cache line size detection for all interested
  architectures under base::CPU (currently leveraged by only
  PPC and ARM64).
- Differentiates between instruction vs data cache line sizes.

R=rmcilroy@chromium.org, jochen@chromium.org, joransiu@ca.ibm.com, jyan@ca.ibm.com, michael_dawson@ca.ibm.com
BUG=

Review URL: https://codereview.chromium.org/1643363002

Cr-Commit-Position: refs/heads/master@{#33642}
parent 8420d575
...@@ -82,7 +82,7 @@ static unsigned CpuFeaturesImpliedByCompiler() { ...@@ -82,7 +82,7 @@ static unsigned CpuFeaturesImpliedByCompiler() {
void CpuFeatures::ProbeImpl(bool cross_compile) { void CpuFeatures::ProbeImpl(bool cross_compile) {
supported_ |= CpuFeaturesImpliedByCompiler(); supported_ |= CpuFeaturesImpliedByCompiler();
cache_line_size_ = 64; dcache_line_size_ = 64;
// Only use statically determined features for cross compile (snapshot). // Only use statically determined features for cross compile (snapshot).
if (cross_compile) return; if (cross_compile) return;
...@@ -137,7 +137,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) { ...@@ -137,7 +137,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
if (cpu.implementer() == base::CPU::ARM && if (cpu.implementer() == base::CPU::ARM &&
(cpu.part() == base::CPU::ARM_CORTEX_A5 || (cpu.part() == base::CPU::ARM_CORTEX_A5 ||
cpu.part() == base::CPU::ARM_CORTEX_A9)) { cpu.part() == base::CPU::ARM_CORTEX_A9)) {
cache_line_size_ = 32; dcache_line_size_ = 32;
} }
if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS; if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS;
......
...@@ -108,23 +108,23 @@ MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate, ...@@ -108,23 +108,23 @@ MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate,
__ b(lt, &size_less_than_8); __ b(lt, &size_less_than_8);
__ cmp(chars, Operand(32)); __ cmp(chars, Operand(32));
__ b(lt, &less_32); __ b(lt, &less_32);
if (CpuFeatures::cache_line_size() == 32) { if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 32)); __ pld(MemOperand(src, 32));
} }
__ cmp(chars, Operand(64)); __ cmp(chars, Operand(64));
__ b(lt, &less_64); __ b(lt, &less_64);
__ pld(MemOperand(src, 64)); __ pld(MemOperand(src, 64));
if (CpuFeatures::cache_line_size() == 32) { if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 96)); __ pld(MemOperand(src, 96));
} }
__ cmp(chars, Operand(128)); __ cmp(chars, Operand(128));
__ b(lt, &less_128); __ b(lt, &less_128);
__ pld(MemOperand(src, 128)); __ pld(MemOperand(src, 128));
if (CpuFeatures::cache_line_size() == 32) { if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 160)); __ pld(MemOperand(src, 160));
} }
__ pld(MemOperand(src, 192)); __ pld(MemOperand(src, 192));
if (CpuFeatures::cache_line_size() == 32) { if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 224)); __ pld(MemOperand(src, 224));
} }
__ cmp(chars, Operand(256)); __ cmp(chars, Operand(256));
...@@ -134,7 +134,7 @@ MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate, ...@@ -134,7 +134,7 @@ MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate,
__ bind(&loop); __ bind(&loop);
__ pld(MemOperand(src, 256)); __ pld(MemOperand(src, 256));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
if (CpuFeatures::cache_line_size() == 32) { if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 256)); __ pld(MemOperand(src, 256));
} }
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
......
...@@ -58,6 +58,9 @@ void CpuFeatures::ProbeImpl(bool cross_compile) { ...@@ -58,6 +58,9 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
cpu.part() <= base::CPU::NVIDIA_DENVER_V10) { cpu.part() <= base::CPU::NVIDIA_DENVER_V10) {
supported_ |= 1u << COHERENT_CACHE; supported_ |= 1u << COHERENT_CACHE;
} }
icache_line_size_ = cpu.icache_line_size();
dcache_line_size_ = cpu.dcache_line_size();
} }
......
...@@ -12,32 +12,6 @@ ...@@ -12,32 +12,6 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class CacheLineSizes {
public:
CacheLineSizes() {
#ifdef USE_SIMULATOR
cache_type_register_ = 0;
#else
// Copy the content of the cache type register to a core register.
__asm__ __volatile__ ("mrs %[ctr], ctr_el0" // NOLINT
: [ctr] "=r" (cache_type_register_));
#endif
}
uint32_t icache_line_size() const { return ExtractCacheLineSize(0); }
uint32_t dcache_line_size() const { return ExtractCacheLineSize(16); }
private:
uint32_t ExtractCacheLineSize(int cache_line_size_shift) const {
// The cache type register holds the size of cache lines in words as a
// power of two.
return 4 << ((cache_type_register_ >> cache_line_size_shift) & 0xf);
}
uint32_t cache_type_register_;
};
void CpuFeatures::FlushICache(void* address, size_t length) { void CpuFeatures::FlushICache(void* address, size_t length) {
#ifdef V8_HOST_ARCH_ARM64 #ifdef V8_HOST_ARCH_ARM64
// The code below assumes user space cache operations are allowed. The goal // The code below assumes user space cache operations are allowed. The goal
...@@ -46,9 +20,8 @@ void CpuFeatures::FlushICache(void* address, size_t length) { ...@@ -46,9 +20,8 @@ void CpuFeatures::FlushICache(void* address, size_t length) {
uintptr_t start = reinterpret_cast<uintptr_t>(address); uintptr_t start = reinterpret_cast<uintptr_t>(address);
// Sizes will be used to generate a mask big enough to cover a pointer. // Sizes will be used to generate a mask big enough to cover a pointer.
CacheLineSizes sizes; uintptr_t dsize = CpuFeatures::dcache_line_size();
uintptr_t dsize = sizes.dcache_line_size(); uintptr_t isize = CpuFeatures::icache_line_size();
uintptr_t isize = sizes.icache_line_size();
// Cache line sizes are always a power of 2. // Cache line sizes are always a power of 2.
DCHECK(CountSetBits(dsize, 64) == 1); DCHECK(CountSetBits(dsize, 64) == 1);
DCHECK(CountSetBits(isize, 64) == 1); DCHECK(CountSetBits(isize, 64) == 1);
......
...@@ -264,8 +264,8 @@ CpuFeatureScope::~CpuFeatureScope() { ...@@ -264,8 +264,8 @@ CpuFeatureScope::~CpuFeatureScope() {
bool CpuFeatures::initialized_ = false; bool CpuFeatures::initialized_ = false;
unsigned CpuFeatures::supported_ = 0; unsigned CpuFeatures::supported_ = 0;
unsigned CpuFeatures::cache_line_size_ = 0; unsigned CpuFeatures::icache_line_size_ = 0;
unsigned CpuFeatures::dcache_line_size_ = 0;
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Implementation of Label // Implementation of Label
......
...@@ -223,9 +223,14 @@ class CpuFeatures : public AllStatic { ...@@ -223,9 +223,14 @@ class CpuFeatures : public AllStatic {
static inline bool SupportsCrankshaft(); static inline bool SupportsCrankshaft();
static inline unsigned cache_line_size() { static inline unsigned icache_line_size() {
DCHECK(cache_line_size_ != 0); DCHECK(icache_line_size_ != 0);
return cache_line_size_; return icache_line_size_;
}
static inline unsigned dcache_line_size() {
DCHECK(dcache_line_size_ != 0);
return dcache_line_size_;
} }
static void PrintTarget(); static void PrintTarget();
...@@ -241,7 +246,8 @@ class CpuFeatures : public AllStatic { ...@@ -241,7 +246,8 @@ class CpuFeatures : public AllStatic {
static void ProbeImpl(bool cross_compile); static void ProbeImpl(bool cross_compile);
static unsigned supported_; static unsigned supported_;
static unsigned cache_line_size_; static unsigned icache_line_size_;
static unsigned dcache_line_size_;
static bool initialized_; static bool initialized_;
DISALLOW_COPY_AND_ASSIGN(CpuFeatures); DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
}; };
......
...@@ -77,6 +77,33 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) { ...@@ -77,6 +77,33 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) {
#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 \ #elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 \
|| V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64 || V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64
#if V8_HOST_ARCH_ARM64
class CacheLineSizes {
public:
CacheLineSizes() {
#ifdef USE_SIMULATOR
cache_type_register_ = 0;
#else
// Copy the content of the cache type register to a core register.
__asm__ __volatile__("mrs %[ctr], ctr_el0" // NOLINT
: [ctr] "=r"(cache_type_register_));
#endif
}
uint32_t icache_line_size() const { return ExtractCacheLineSize(0); }
uint32_t dcache_line_size() const { return ExtractCacheLineSize(16); }
private:
uint32_t ExtractCacheLineSize(int cache_line_size_shift) const {
// The cache type register holds the size of cache lines in words as a
// power of two.
return 4 << ((cache_type_register_ >> cache_line_size_shift) & 0xf);
}
uint32_t cache_type_register_;
};
#endif // V8_HOST_ARCH_ARM64
#if V8_OS_LINUX #if V8_OS_LINUX
#if V8_HOST_ARCH_ARM #if V8_HOST_ARCH_ARM
...@@ -312,6 +339,8 @@ CPU::CPU() ...@@ -312,6 +339,8 @@ CPU::CPU()
architecture_(0), architecture_(0),
variant_(-1), variant_(-1),
part_(0), part_(0),
icache_line_size_(UNKNOWN_CACHE_LINE_SIZE),
dcache_line_size_(UNKNOWN_CACHE_LINE_SIZE),
has_fpu_(false), has_fpu_(false),
has_cmov_(false), has_cmov_(false),
has_sahf_(false), has_sahf_(false),
...@@ -626,6 +655,10 @@ CPU::CPU() ...@@ -626,6 +655,10 @@ CPU::CPU()
delete[] part; delete[] part;
} }
CacheLineSizes sizes;
icache_line_size_ = sizes.dcache_line_size();
dcache_line_size_ = sizes.icache_line_size();
#elif V8_HOST_ARCH_PPC #elif V8_HOST_ARCH_PPC
#ifndef USE_SIMULATOR #ifndef USE_SIMULATOR
...@@ -644,9 +677,16 @@ CPU::CPU() ...@@ -644,9 +677,16 @@ CPU::CPU()
if (n == 0 || entry.a_type == AT_NULL) { if (n == 0 || entry.a_type == AT_NULL) {
break; break;
} }
if (entry.a_type == AT_PLATFORM) { switch (entry.a_type) {
case AT_PLATFORM:
auxv_cpu_type = reinterpret_cast<char*>(entry.a_un.a_val); auxv_cpu_type = reinterpret_cast<char*>(entry.a_un.a_val);
break; break;
case AT_ICACHEBSIZE:
icache_line_size_ = entry.a_un.a_val;
break;
case AT_DCACHEBSIZE:
dcache_line_size_ = entry.a_un.a_val;
break;
} }
} }
fclose(fp); fclose(fp);
......
...@@ -75,6 +75,9 @@ class CPU final { ...@@ -75,6 +75,9 @@ class CPU final {
// General features // General features
bool has_fpu() const { return has_fpu_; } bool has_fpu() const { return has_fpu_; }
int icache_line_size() const { return icache_line_size_; }
int dcache_line_size() const { return dcache_line_size_; }
static const int UNKNOWN_CACHE_LINE_SIZE = 0;
// x86 features // x86 features
bool has_cmov() const { return has_cmov_; } bool has_cmov() const { return has_cmov_; }
...@@ -118,6 +121,8 @@ class CPU final { ...@@ -118,6 +121,8 @@ class CPU final {
int architecture_; int architecture_;
int variant_; int variant_;
int part_; int part_;
int icache_line_size_;
int dcache_line_size_;
bool has_fpu_; bool has_fpu_;
bool has_cmov_; bool has_cmov_;
bool has_sahf_; bool has_sahf_;
......
...@@ -55,7 +55,7 @@ static unsigned CpuFeaturesImpliedByCompiler() { ...@@ -55,7 +55,7 @@ static unsigned CpuFeaturesImpliedByCompiler() {
void CpuFeatures::ProbeImpl(bool cross_compile) { void CpuFeatures::ProbeImpl(bool cross_compile) {
supported_ |= CpuFeaturesImpliedByCompiler(); supported_ |= CpuFeaturesImpliedByCompiler();
cache_line_size_ = 128; icache_line_size_ = 128;
// Only use statically determined features for cross compile (snapshot). // Only use statically determined features for cross compile (snapshot).
if (cross_compile) return; if (cross_compile) return;
...@@ -85,6 +85,9 @@ void CpuFeatures::ProbeImpl(bool cross_compile) { ...@@ -85,6 +85,9 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
// Assume support // Assume support
supported_ |= (1u << FPU); supported_ |= (1u << FPU);
} }
if (cpu.icache_line_size() != base::CPU::UNKNOWN_CACHE_LINE_SIZE) {
icache_line_size_ = cpu.icache_line_size();
}
#elif V8_OS_AIX #elif V8_OS_AIX
// Assume support FP support and default cache line size // Assume support FP support and default cache line size
supported_ |= (1u << FPU); supported_ |= (1u << FPU);
......
...@@ -25,7 +25,7 @@ void CpuFeatures::FlushICache(void* buffer, size_t size) { ...@@ -25,7 +25,7 @@ void CpuFeatures::FlushICache(void* buffer, size_t size) {
return; return;
} }
const int kCacheLineSize = CpuFeatures::cache_line_size(); const int kCacheLineSize = CpuFeatures::icache_line_size();
intptr_t mask = kCacheLineSize - 1; intptr_t mask = kCacheLineSize - 1;
byte *start = byte *start =
reinterpret_cast<byte *>(reinterpret_cast<intptr_t>(buffer) & ~mask); reinterpret_cast<byte *>(reinterpret_cast<intptr_t>(buffer) & ~mask);
......
...@@ -3173,41 +3173,6 @@ void MacroAssembler::CallCFunctionHelper(Register function, ...@@ -3173,41 +3173,6 @@ void MacroAssembler::CallCFunctionHelper(Register function,
} }
void MacroAssembler::FlushICache(Register address, size_t size,
Register scratch) {
if (CpuFeatures::IsSupported(INSTR_AND_DATA_CACHE_COHERENCY)) {
sync();
icbi(r0, address);
isync();
return;
}
Label done;
dcbf(r0, address);
sync();
icbi(r0, address);
isync();
// This code handles ranges which cross a single cacheline boundary.
// scratch is last cacheline which intersects range.
const int kCacheLineSizeLog2 = WhichPowerOf2(CpuFeatures::cache_line_size());
DCHECK(size > 0 && size <= (size_t)(1 << kCacheLineSizeLog2));
addi(scratch, address, Operand(size - 1));
ClearRightImm(scratch, scratch, Operand(kCacheLineSizeLog2));
cmpl(scratch, address);
ble(&done);
dcbf(r0, scratch);
sync();
icbi(r0, scratch);
isync();
bind(&done);
}
void MacroAssembler::DecodeConstantPoolOffset(Register result, void MacroAssembler::DecodeConstantPoolOffset(Register result,
Register location) { Register location) {
Label overflow_access, done; Label overflow_access, done;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment