Commit f4fb7025 authored by arajp's avatar arajp Committed by Commit bot

Make FlushICache NOP for Nvidia Denver CPU's.

Denver supports a coherent cache mechanism. There is no need to clean
the D cache and invalidate I cache. MTS has to check the translation
anytime there is an I cache invalidate and this time can be saved by
making FlushICache a NOP.

The patch improves Octane by roughly 3-4% on Denver.

Review URL: https://codereview.chromium.org/797233002

Cr-Commit-Position: refs/heads/master@{#25898}
parent 31c66e2d
......@@ -127,6 +127,11 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
}
if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS;
if (cpu.implementer() == base::CPU::NVIDIA &&
cpu.variant() == base::CPU::NVIDIA_DENVER) {
supported_ |= 1u << COHERENT_CACHE;
}
#endif
DCHECK(!IsSupported(VFP3) || IsSupported(ARMv7));
......@@ -188,14 +193,15 @@ void CpuFeatures::PrintTarget() {
void CpuFeatures::PrintFeatures() {
printf(
"ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
"MOVW_MOVT_IMMEDIATE_LOADS=%d",
"MOVW_MOVT_IMMEDIATE_LOADS=%d COHERENT_CACHE=%d",
CpuFeatures::IsSupported(ARMv7),
CpuFeatures::IsSupported(VFP3),
CpuFeatures::IsSupported(VFP32DREGS),
CpuFeatures::IsSupported(NEON),
CpuFeatures::IsSupported(SUDIV),
CpuFeatures::IsSupported(UNALIGNED_ACCESSES),
CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS));
CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS),
CpuFeatures::IsSupported(COHERENT_CACHE));
#ifdef __arm__
bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
#elif USE_EABI_HARDFLOAT
......
......@@ -27,6 +27,8 @@ namespace internal {
void CpuFeatures::FlushICache(void* start, size_t size) {
if (size == 0) return;
if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
#if defined(USE_SIMULATOR)
// Not generating ARM instructions for C-code. This means that we are
// building an ARM emulator based target. We should notify the simulator
......
......@@ -46,11 +46,25 @@ namespace internal {
void CpuFeatures::ProbeImpl(bool cross_compile) {
// AArch64 has no configuration options, no further probing is required.
supported_ = 0;
// Only use statically determined features for cross compile (snapshot).
if (cross_compile) return;
// Probe for runtime features
base::CPU cpu;
if (cpu.implementer() == base::CPU::NVIDIA &&
cpu.variant() == base::CPU::NVIDIA_DENVER) {
supported_ |= 1u << COHERENT_CACHE;
}
}
void CpuFeatures::PrintTarget() { }
void CpuFeatures::PrintFeatures() { }
void CpuFeatures::PrintFeatures() {
printf("COHERENT_CACHE=%d\n", CpuFeatures::IsSupported(COHERENT_CACHE));
}
// -----------------------------------------------------------------------------
......
......@@ -43,6 +43,8 @@ class CacheLineSizes {
void CpuFeatures::FlushICache(void* address, size_t length) {
if (length == 0) return;
if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
#ifdef USE_SIMULATOR
// TODO(all): consider doing some cache simulation to ensure every address
// run has been synced.
......
......@@ -300,6 +300,7 @@ CPU::CPU()
type_(0),
implementer_(0),
architecture_(0),
variant_(-1),
part_(0),
has_fpu_(false),
has_cmov_(false),
......@@ -388,7 +389,7 @@ CPU::CPU()
// Extract implementor from the "CPU implementer" field.
char* implementer = cpu_info.ExtractField("CPU implementer");
if (implementer != NULL) {
char* end ;
char* end;
implementer_ = strtol(implementer, &end, 0);
if (end == implementer) {
implementer_ = 0;
......@@ -396,10 +397,20 @@ CPU::CPU()
delete[] implementer;
}
char* variant = cpu_info.ExtractField("CPU variant");
if (variant != NULL) {
char* end;
variant_ = strtol(variant, &end, 0);
if (end == variant) {
variant_ = -1;
}
delete[] variant;
}
// Extract part number from the "CPU part" field.
char* part = cpu_info.ExtractField("CPU part");
if (part != NULL) {
char* end ;
char* end;
part_ = strtol(part, &end, 0);
if (end == part) {
part_ = 0;
......@@ -540,7 +551,7 @@ CPU::CPU()
// Extract implementor from the "CPU implementer" field.
char* implementer = cpu_info.ExtractField("CPU implementer");
if (implementer != NULL) {
char* end ;
char* end;
implementer_ = strtol(implementer, &end, 0);
if (end == implementer) {
implementer_ = 0;
......@@ -548,10 +559,20 @@ CPU::CPU()
delete[] implementer;
}
char* variant = cpu_info.ExtractField("CPU variant");
if (variant != NULL) {
char* end;
variant_ = strtol(variant, &end, 0);
if (end == variant) {
variant_ = -1;
}
delete[] variant;
}
// Extract part number from the "CPU part" field.
char* part = cpu_info.ExtractField("CPU part");
if (part != NULL) {
char* end ;
char* end;
part_ = strtol(part, &end, 0);
if (end == part) {
part_ = 0;
......
......@@ -47,6 +47,8 @@ class CPU FINAL {
static const int NVIDIA = 0x4e;
static const int QUALCOMM = 0x51;
int architecture() const { return architecture_; }
int variant() const { return variant_; }
static const int NVIDIA_DENVER = 0x0;
int part() const { return part_; }
static const int ARM_CORTEX_A5 = 0xc05;
static const int ARM_CORTEX_A7 = 0xc07;
......@@ -92,6 +94,7 @@ class CPU FINAL {
int type_;
int implementer_;
int architecture_;
int variant_;
int part_;
bool has_fpu_;
bool has_cmov_;
......
......@@ -609,6 +609,7 @@ enum CpuFeature {
MIPSr6,
// ARM64
ALWAYS_ALIGN_CSP,
COHERENT_CACHE,
NUMBER_OF_CPU_FEATURES
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment