Emulate MLS on pre-ARMv6T2. Cleaned up thumbee vs. thumb2 confusion.

This should unbreak things on the Raspberry Pi.

LOG=y
R=bmeurer@chromium.org

Review URL: https://codereview.chromium.org/331803003

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@21835 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 8334faa0
...@@ -84,10 +84,11 @@ void CpuFeatures::ProbeImpl(bool cross_compile) { ...@@ -84,10 +84,11 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
supported_ |= 1u << ARMv7; supported_ |= 1u << ARMv7;
if (FLAG_enable_vfp3) supported_ |= 1u << VFP3; if (FLAG_enable_vfp3) supported_ |= 1u << VFP3;
if (FLAG_enable_neon) supported_ |= 1u << NEON | 1u << VFP32DREGS; if (FLAG_enable_neon) supported_ |= 1u << NEON | 1u << VFP32DREGS;
if (FLAG_enable_sudiv) supported_ |= 1u << SUDIV; if (FLAG_enable_sudiv) supported_ |= 1u << SUDIV;
if (FLAG_enable_movw_movt) supported_ |= 1u << MOVW_MOVT_IMMEDIATE_LOADS; if (FLAG_enable_movw_movt) supported_ |= 1u << MOVW_MOVT_IMMEDIATE_LOADS;
if (FLAG_enable_32dregs) supported_ |= 1u << VFP32DREGS; if (FLAG_enable_32dregs) supported_ |= 1u << VFP32DREGS;
} }
if (FLAG_enable_mls) supported_ |= 1u << MLS;
if (FLAG_enable_unaligned_accesses) supported_ |= 1u << UNALIGNED_ACCESSES; if (FLAG_enable_unaligned_accesses) supported_ |= 1u << UNALIGNED_ACCESSES;
#else // __arm__ #else // __arm__
...@@ -102,6 +103,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) { ...@@ -102,6 +103,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
if (FLAG_enable_neon && cpu.has_neon()) supported_ |= 1u << NEON; if (FLAG_enable_neon && cpu.has_neon()) supported_ |= 1u << NEON;
if (FLAG_enable_sudiv && cpu.has_idiva()) supported_ |= 1u << SUDIV; if (FLAG_enable_sudiv && cpu.has_idiva()) supported_ |= 1u << SUDIV;
if (FLAG_enable_mls && cpu.has_thumb2()) supported_ |= 1u << MLS;
if (cpu.architecture() >= 7) { if (cpu.architecture() >= 7) {
if (FLAG_enable_armv7) supported_ |= 1u << ARMv7; if (FLAG_enable_armv7) supported_ |= 1u << ARMv7;
...@@ -744,7 +746,7 @@ int Assembler::GetCmpImmediateRawImmediate(Instr instr) { ...@@ -744,7 +746,7 @@ int Assembler::GetCmpImmediateRawImmediate(Instr instr) {
// same position. // same position.
int Assembler::target_at(int pos) { int Assembler::target_at(int pos) {
Instr instr = instr_at(pos); Instr instr = instr_at(pos);
if (is_uint24(instr)) { if (is_uint24(instr)) {
// Emitted link to a label, not part of a branch. // Emitted link to a label, not part of a branch.
...@@ -1481,6 +1483,7 @@ void Assembler::mla(Register dst, Register src1, Register src2, Register srcA, ...@@ -1481,6 +1483,7 @@ void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
void Assembler::mls(Register dst, Register src1, Register src2, Register srcA, void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
Condition cond) { Condition cond) {
ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc)); ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
ASSERT(IsEnabled(MLS));
emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 | emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
src2.code()*B8 | B7 | B4 | src1.code()); src2.code()*B8 | B7 | B4 | src1.code());
} }
......
...@@ -1215,7 +1215,7 @@ void LCodeGen::DoModI(LModI* instr) { ...@@ -1215,7 +1215,7 @@ void LCodeGen::DoModI(LModI* instr) {
// mls r3, r3, r2, r1 // mls r3, r3, r2, r1
__ sdiv(result_reg, left_reg, right_reg); __ sdiv(result_reg, left_reg, right_reg);
__ mls(result_reg, result_reg, right_reg, left_reg); __ Mls(result_reg, result_reg, right_reg, left_reg);
// If we care about -0, test if the dividend is <0 and the result is 0. // If we care about -0, test if the dividend is <0 and the result is 0.
if (hmod->CheckFlag(HValue::kBailoutOnMinusZero)) { if (hmod->CheckFlag(HValue::kBailoutOnMinusZero)) {
...@@ -1413,7 +1413,7 @@ void LCodeGen::DoDivI(LDivI* instr) { ...@@ -1413,7 +1413,7 @@ void LCodeGen::DoDivI(LDivI* instr) {
if (!hdiv->CheckFlag(HValue::kAllUsesTruncatingToInt32)) { if (!hdiv->CheckFlag(HValue::kAllUsesTruncatingToInt32)) {
// Compute remainder and deopt if it's not zero. // Compute remainder and deopt if it's not zero.
Register remainder = scratch0(); Register remainder = scratch0();
__ mls(remainder, result, divisor, dividend); __ Mls(remainder, result, divisor, dividend);
__ cmp(remainder, Operand::Zero()); __ cmp(remainder, Operand::Zero());
DeoptimizeIf(ne, instr->environment()); DeoptimizeIf(ne, instr->environment());
} }
...@@ -1588,7 +1588,7 @@ void LCodeGen::DoFlooringDivI(LFlooringDivI* instr) { ...@@ -1588,7 +1588,7 @@ void LCodeGen::DoFlooringDivI(LFlooringDivI* instr) {
Label done; Label done;
Register remainder = scratch0(); Register remainder = scratch0();
__ mls(remainder, result, right, left); __ Mls(remainder, result, right, left);
__ cmp(remainder, Operand::Zero()); __ cmp(remainder, Operand::Zero());
__ b(eq, &done); __ b(eq, &done);
__ eor(remainder, remainder, Operand(right)); __ eor(remainder, remainder, Operand(right));
......
...@@ -254,6 +254,19 @@ void MacroAssembler::Move(DwVfpRegister dst, DwVfpRegister src) { ...@@ -254,6 +254,19 @@ void MacroAssembler::Move(DwVfpRegister dst, DwVfpRegister src) {
} }
void MacroAssembler::Mls(Register dst, Register src1, Register src2,
Register srcA, Condition cond) {
if (CpuFeatures::IsSupported(MLS)) {
CpuFeatureScope scope(this, MLS);
mls(dst, src1, src2, srcA, cond);
} else {
ASSERT(!dst.is(srcA));
mul(ip, src1, src2, LeaveCC, cond);
sub(dst, srcA, ip, LeaveCC, cond);
}
}
void MacroAssembler::And(Register dst, Register src1, const Operand& src2, void MacroAssembler::And(Register dst, Register src1, const Operand& src2,
Condition cond) { Condition cond) {
if (!src2.is_reg() && if (!src2.is_reg() &&
......
...@@ -117,7 +117,8 @@ class MacroAssembler: public Assembler { ...@@ -117,7 +117,8 @@ class MacroAssembler: public Assembler {
Register scratch = no_reg, Register scratch = no_reg,
Condition cond = al); Condition cond = al);
void Mls(Register dst, Register src1, Register src2, Register srcA,
Condition cond = al);
void And(Register dst, Register src1, const Operand& src2, void And(Register dst, Register src1, const Operand& src2,
Condition cond = al); Condition cond = al);
void Ubfx(Register dst, Register src, int lsb, int width, void Ubfx(Register dst, Register src, int lsb, int width,
......
...@@ -259,7 +259,7 @@ CPU::CPU() : stepping_(0), ...@@ -259,7 +259,7 @@ CPU::CPU() : stepping_(0),
has_sse42_(false), has_sse42_(false),
has_idiva_(false), has_idiva_(false),
has_neon_(false), has_neon_(false),
has_thumbee_(false), has_thumb2_(false),
has_vfp_(false), has_vfp_(false),
has_vfp3_(false), has_vfp3_(false),
has_vfp3_d32_(false) { has_vfp3_d32_(false) {
...@@ -383,7 +383,6 @@ CPU::CPU() : stepping_(0), ...@@ -383,7 +383,6 @@ CPU::CPU() : stepping_(0),
if (hwcaps != 0) { if (hwcaps != 0) {
has_idiva_ = (hwcaps & HWCAP_IDIVA) != 0; has_idiva_ = (hwcaps & HWCAP_IDIVA) != 0;
has_neon_ = (hwcaps & HWCAP_NEON) != 0; has_neon_ = (hwcaps & HWCAP_NEON) != 0;
has_thumbee_ = (hwcaps & HWCAP_THUMBEE) != 0;
has_vfp_ = (hwcaps & HWCAP_VFP) != 0; has_vfp_ = (hwcaps & HWCAP_VFP) != 0;
has_vfp3_ = (hwcaps & (HWCAP_VFPv3 | HWCAP_VFPv3D16 | HWCAP_VFPv4)) != 0; has_vfp3_ = (hwcaps & (HWCAP_VFPv3 | HWCAP_VFPv3D16 | HWCAP_VFPv4)) != 0;
has_vfp3_d32_ = (has_vfp3_ && ((hwcaps & HWCAP_VFPv3D16) == 0 || has_vfp3_d32_ = (has_vfp3_ && ((hwcaps & HWCAP_VFPv3D16) == 0 ||
...@@ -393,7 +392,7 @@ CPU::CPU() : stepping_(0), ...@@ -393,7 +392,7 @@ CPU::CPU() : stepping_(0),
char* features = cpu_info.ExtractField("Features"); char* features = cpu_info.ExtractField("Features");
has_idiva_ = HasListItem(features, "idiva"); has_idiva_ = HasListItem(features, "idiva");
has_neon_ = HasListItem(features, "neon"); has_neon_ = HasListItem(features, "neon");
has_thumbee_ = HasListItem(features, "thumbee"); has_thumb2_ = HasListItem(features, "thumb2");
has_vfp_ = HasListItem(features, "vfp"); has_vfp_ = HasListItem(features, "vfp");
if (HasListItem(features, "vfpv3d16")) { if (HasListItem(features, "vfpv3d16")) {
has_vfp3_ = true; has_vfp3_ = true;
...@@ -417,13 +416,13 @@ CPU::CPU() : stepping_(0), ...@@ -417,13 +416,13 @@ CPU::CPU() : stepping_(0),
architecture_ = 7; architecture_ = 7;
} }
// ARMv7 implies ThumbEE. // ARMv7 implies Thumb2.
if (architecture_ >= 7) { if (architecture_ >= 7) {
has_thumbee_ = true; has_thumb2_ = true;
} }
// The earliest architecture with ThumbEE is ARMv6T2. // The earliest architecture with Thumb2 is ARMv6T2.
if (has_thumbee_ && architecture_ < 6) { if (has_thumb2_ && architecture_ < 6) {
architecture_ = 6; architecture_ = 6;
} }
...@@ -435,10 +434,10 @@ CPU::CPU() : stepping_(0), ...@@ -435,10 +434,10 @@ CPU::CPU() : stepping_(0),
uint32_t cpu_flags = SYSPAGE_ENTRY(cpuinfo)->flags; uint32_t cpu_flags = SYSPAGE_ENTRY(cpuinfo)->flags;
if (cpu_flags & ARM_CPU_FLAG_V7) { if (cpu_flags & ARM_CPU_FLAG_V7) {
architecture_ = 7; architecture_ = 7;
has_thumbee_ = true; has_thumb2_ = true;
} else if (cpu_flags & ARM_CPU_FLAG_V6) { } else if (cpu_flags & ARM_CPU_FLAG_V6) {
architecture_ = 6; architecture_ = 6;
// QNX doesn't say if ThumbEE is available. // QNX doesn't say if Thumb2 is available.
// Assume false for the architectures older than ARMv7. // Assume false for the architectures older than ARMv7.
} }
ASSERT(architecture_ >= 6); ASSERT(architecture_ >= 6);
......
...@@ -72,7 +72,7 @@ class CPU V8_FINAL BASE_EMBEDDED { ...@@ -72,7 +72,7 @@ class CPU V8_FINAL BASE_EMBEDDED {
// arm features // arm features
bool has_idiva() const { return has_idiva_; } bool has_idiva() const { return has_idiva_; }
bool has_neon() const { return has_neon_; } bool has_neon() const { return has_neon_; }
bool has_thumbee() const { return has_thumbee_; } bool has_thumb2() const { return has_thumb2_; }
bool has_vfp() const { return has_vfp_; } bool has_vfp() const { return has_vfp_; }
bool has_vfp3() const { return has_vfp3_; } bool has_vfp3() const { return has_vfp3_; }
bool has_vfp3_d32() const { return has_vfp3_d32_; } bool has_vfp3_d32() const { return has_vfp3_d32_; }
...@@ -103,7 +103,7 @@ class CPU V8_FINAL BASE_EMBEDDED { ...@@ -103,7 +103,7 @@ class CPU V8_FINAL BASE_EMBEDDED {
bool has_sse42_; bool has_sse42_;
bool has_idiva_; bool has_idiva_;
bool has_neon_; bool has_neon_;
bool has_thumbee_; bool has_thumb2_;
bool has_vfp_; bool has_vfp_;
bool has_vfp3_; bool has_vfp3_;
bool has_vfp3_d32_; bool has_vfp3_d32_;
......
...@@ -373,6 +373,8 @@ DEFINE_bool(enable_neon, ENABLE_NEON_DEFAULT, ...@@ -373,6 +373,8 @@ DEFINE_bool(enable_neon, ENABLE_NEON_DEFAULT,
"enable use of NEON instructions if available (ARM only)") "enable use of NEON instructions if available (ARM only)")
DEFINE_bool(enable_sudiv, true, DEFINE_bool(enable_sudiv, true,
"enable use of SDIV and UDIV instructions if available (ARM only)") "enable use of SDIV and UDIV instructions if available (ARM only)")
DEFINE_bool(enable_mls, true,
"enable use of MLS instructions if available (ARM only)")
DEFINE_bool(enable_movw_movt, false, DEFINE_bool(enable_movw_movt, false,
"enable loading 32-bit constant by means of movw/movt " "enable loading 32-bit constant by means of movw/movt "
"instruction pairs (ARM only)") "instruction pairs (ARM only)")
......
...@@ -616,6 +616,7 @@ enum CpuFeature { ...@@ -616,6 +616,7 @@ enum CpuFeature {
VFP3, VFP3,
ARMv7, ARMv7,
SUDIV, SUDIV,
MLS,
UNALIGNED_ACCESSES, UNALIGNED_ACCESSES,
MOVW_MOVT_IMMEDIATE_LOADS, MOVW_MOVT_IMMEDIATE_LOADS,
VFP32DREGS, VFP32DREGS,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment