Commit c3ff68b6 authored by mbrandy's avatar mbrandy Committed by Commit bot

Detect cache line size on Linux for PPC hosts.

In the interest of generalization, this change:
- Consolidates cache line size detection for all interested
  architectures under base::CPU (currently leveraged by only
  PPC and ARM64).
- Differentiates between instruction vs data cache line sizes.

R=rmcilroy@chromium.org, jochen@chromium.org, joransiu@ca.ibm.com, jyan@ca.ibm.com, michael_dawson@ca.ibm.com
BUG=

Review URL: https://codereview.chromium.org/1643363002

Cr-Commit-Position: refs/heads/master@{#33642}
parent 8420d575
......@@ -82,7 +82,7 @@ static unsigned CpuFeaturesImpliedByCompiler() {
void CpuFeatures::ProbeImpl(bool cross_compile) {
supported_ |= CpuFeaturesImpliedByCompiler();
cache_line_size_ = 64;
dcache_line_size_ = 64;
// Only use statically determined features for cross compile (snapshot).
if (cross_compile) return;
......@@ -137,7 +137,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
if (cpu.implementer() == base::CPU::ARM &&
(cpu.part() == base::CPU::ARM_CORTEX_A5 ||
cpu.part() == base::CPU::ARM_CORTEX_A9)) {
cache_line_size_ = 32;
dcache_line_size_ = 32;
}
if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS;
......
......@@ -108,23 +108,23 @@ MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate,
__ b(lt, &size_less_than_8);
__ cmp(chars, Operand(32));
__ b(lt, &less_32);
if (CpuFeatures::cache_line_size() == 32) {
if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 32));
}
__ cmp(chars, Operand(64));
__ b(lt, &less_64);
__ pld(MemOperand(src, 64));
if (CpuFeatures::cache_line_size() == 32) {
if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 96));
}
__ cmp(chars, Operand(128));
__ b(lt, &less_128);
__ pld(MemOperand(src, 128));
if (CpuFeatures::cache_line_size() == 32) {
if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 160));
}
__ pld(MemOperand(src, 192));
if (CpuFeatures::cache_line_size() == 32) {
if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 224));
}
__ cmp(chars, Operand(256));
......@@ -134,7 +134,7 @@ MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate,
__ bind(&loop);
__ pld(MemOperand(src, 256));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
if (CpuFeatures::cache_line_size() == 32) {
if (CpuFeatures::dcache_line_size() == 32) {
__ pld(MemOperand(src, 256));
}
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
......
......@@ -58,6 +58,9 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
cpu.part() <= base::CPU::NVIDIA_DENVER_V10) {
supported_ |= 1u << COHERENT_CACHE;
}
icache_line_size_ = cpu.icache_line_size();
dcache_line_size_ = cpu.dcache_line_size();
}
......
......@@ -12,32 +12,6 @@
namespace v8 {
namespace internal {
class CacheLineSizes {
public:
CacheLineSizes() {
#ifdef USE_SIMULATOR
cache_type_register_ = 0;
#else
// Copy the content of the cache type register to a core register.
__asm__ __volatile__ ("mrs %[ctr], ctr_el0" // NOLINT
: [ctr] "=r" (cache_type_register_));
#endif
}
uint32_t icache_line_size() const { return ExtractCacheLineSize(0); }
uint32_t dcache_line_size() const { return ExtractCacheLineSize(16); }
private:
uint32_t ExtractCacheLineSize(int cache_line_size_shift) const {
// The cache type register holds the size of cache lines in words as a
// power of two.
return 4 << ((cache_type_register_ >> cache_line_size_shift) & 0xf);
}
uint32_t cache_type_register_;
};
void CpuFeatures::FlushICache(void* address, size_t length) {
#ifdef V8_HOST_ARCH_ARM64
// The code below assumes user space cache operations are allowed. The goal
......@@ -46,9 +20,8 @@ void CpuFeatures::FlushICache(void* address, size_t length) {
uintptr_t start = reinterpret_cast<uintptr_t>(address);
// Sizes will be used to generate a mask big enough to cover a pointer.
CacheLineSizes sizes;
uintptr_t dsize = sizes.dcache_line_size();
uintptr_t isize = sizes.icache_line_size();
uintptr_t dsize = CpuFeatures::dcache_line_size();
uintptr_t isize = CpuFeatures::icache_line_size();
// Cache line sizes are always a power of 2.
DCHECK(CountSetBits(dsize, 64) == 1);
DCHECK(CountSetBits(isize, 64) == 1);
......
......@@ -264,8 +264,8 @@ CpuFeatureScope::~CpuFeatureScope() {
bool CpuFeatures::initialized_ = false;
unsigned CpuFeatures::supported_ = 0;
unsigned CpuFeatures::cache_line_size_ = 0;
unsigned CpuFeatures::icache_line_size_ = 0;
unsigned CpuFeatures::dcache_line_size_ = 0;
// -----------------------------------------------------------------------------
// Implementation of Label
......
......@@ -223,9 +223,14 @@ class CpuFeatures : public AllStatic {
static inline bool SupportsCrankshaft();
static inline unsigned cache_line_size() {
DCHECK(cache_line_size_ != 0);
return cache_line_size_;
static inline unsigned icache_line_size() {
DCHECK(icache_line_size_ != 0);
return icache_line_size_;
}
static inline unsigned dcache_line_size() {
DCHECK(dcache_line_size_ != 0);
return dcache_line_size_;
}
static void PrintTarget();
......@@ -241,7 +246,8 @@ class CpuFeatures : public AllStatic {
static void ProbeImpl(bool cross_compile);
static unsigned supported_;
static unsigned cache_line_size_;
static unsigned icache_line_size_;
static unsigned dcache_line_size_;
static bool initialized_;
DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
};
......
......@@ -77,6 +77,33 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) {
#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 \
|| V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64
#if V8_HOST_ARCH_ARM64
class CacheLineSizes {
public:
CacheLineSizes() {
#ifdef USE_SIMULATOR
cache_type_register_ = 0;
#else
// Copy the content of the cache type register to a core register.
__asm__ __volatile__("mrs %[ctr], ctr_el0" // NOLINT
: [ctr] "=r"(cache_type_register_));
#endif
}
uint32_t icache_line_size() const { return ExtractCacheLineSize(0); }
uint32_t dcache_line_size() const { return ExtractCacheLineSize(16); }
private:
uint32_t ExtractCacheLineSize(int cache_line_size_shift) const {
// The cache type register holds the size of cache lines in words as a
// power of two.
return 4 << ((cache_type_register_ >> cache_line_size_shift) & 0xf);
}
uint32_t cache_type_register_;
};
#endif // V8_HOST_ARCH_ARM64
#if V8_OS_LINUX
#if V8_HOST_ARCH_ARM
......@@ -312,6 +339,8 @@ CPU::CPU()
architecture_(0),
variant_(-1),
part_(0),
icache_line_size_(UNKNOWN_CACHE_LINE_SIZE),
dcache_line_size_(UNKNOWN_CACHE_LINE_SIZE),
has_fpu_(false),
has_cmov_(false),
has_sahf_(false),
......@@ -626,6 +655,10 @@ CPU::CPU()
delete[] part;
}
CacheLineSizes sizes;
icache_line_size_ = sizes.dcache_line_size();
dcache_line_size_ = sizes.icache_line_size();
#elif V8_HOST_ARCH_PPC
#ifndef USE_SIMULATOR
......@@ -644,9 +677,16 @@ CPU::CPU()
if (n == 0 || entry.a_type == AT_NULL) {
break;
}
if (entry.a_type == AT_PLATFORM) {
auxv_cpu_type = reinterpret_cast<char*>(entry.a_un.a_val);
break;
switch (entry.a_type) {
case AT_PLATFORM:
auxv_cpu_type = reinterpret_cast<char*>(entry.a_un.a_val);
break;
case AT_ICACHEBSIZE:
icache_line_size_ = entry.a_un.a_val;
break;
case AT_DCACHEBSIZE:
dcache_line_size_ = entry.a_un.a_val;
break;
}
}
fclose(fp);
......
......@@ -75,6 +75,9 @@ class CPU final {
// General features
bool has_fpu() const { return has_fpu_; }
int icache_line_size() const { return icache_line_size_; }
int dcache_line_size() const { return dcache_line_size_; }
static const int UNKNOWN_CACHE_LINE_SIZE = 0;
// x86 features
bool has_cmov() const { return has_cmov_; }
......@@ -118,6 +121,8 @@ class CPU final {
int architecture_;
int variant_;
int part_;
int icache_line_size_;
int dcache_line_size_;
bool has_fpu_;
bool has_cmov_;
bool has_sahf_;
......
......@@ -55,7 +55,7 @@ static unsigned CpuFeaturesImpliedByCompiler() {
void CpuFeatures::ProbeImpl(bool cross_compile) {
supported_ |= CpuFeaturesImpliedByCompiler();
cache_line_size_ = 128;
icache_line_size_ = 128;
// Only use statically determined features for cross compile (snapshot).
if (cross_compile) return;
......@@ -85,6 +85,9 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
// Assume support
supported_ |= (1u << FPU);
}
if (cpu.icache_line_size() != base::CPU::UNKNOWN_CACHE_LINE_SIZE) {
icache_line_size_ = cpu.icache_line_size();
}
#elif V8_OS_AIX
// Assume support FP support and default cache line size
supported_ |= (1u << FPU);
......
......@@ -25,7 +25,7 @@ void CpuFeatures::FlushICache(void* buffer, size_t size) {
return;
}
const int kCacheLineSize = CpuFeatures::cache_line_size();
const int kCacheLineSize = CpuFeatures::icache_line_size();
intptr_t mask = kCacheLineSize - 1;
byte *start =
reinterpret_cast<byte *>(reinterpret_cast<intptr_t>(buffer) & ~mask);
......
......@@ -3173,41 +3173,6 @@ void MacroAssembler::CallCFunctionHelper(Register function,
}
void MacroAssembler::FlushICache(Register address, size_t size,
Register scratch) {
if (CpuFeatures::IsSupported(INSTR_AND_DATA_CACHE_COHERENCY)) {
sync();
icbi(r0, address);
isync();
return;
}
Label done;
dcbf(r0, address);
sync();
icbi(r0, address);
isync();
// This code handles ranges which cross a single cacheline boundary.
// scratch is last cacheline which intersects range.
const int kCacheLineSizeLog2 = WhichPowerOf2(CpuFeatures::cache_line_size());
DCHECK(size > 0 && size <= (size_t)(1 << kCacheLineSizeLog2));
addi(scratch, address, Operand(size - 1));
ClearRightImm(scratch, scratch, Operand(kCacheLineSizeLog2));
cmpl(scratch, address);
ble(&done);
dcbf(r0, scratch);
sync();
icbi(r0, scratch);
isync();
bind(&done);
}
void MacroAssembler::DecodeConstantPoolOffset(Register result,
Register location) {
Label overflow_access, done;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment