Commit 74492ab2 authored by ulan@chromium.org's avatar ulan@chromium.org

Emit VMLA for multiply-add on ARM

Review URL: https://chromiumcodereview.appspot.com/11293061
Patch from Hans Wennborg <hans@chromium.org>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12958 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent e49a910d
...@@ -2390,6 +2390,20 @@ void Assembler::vmul(const DwVfpRegister dst, ...@@ -2390,6 +2390,20 @@ void Assembler::vmul(const DwVfpRegister dst,
} }
void Assembler::vmla(const DwVfpRegister dst,
const DwVfpRegister src1,
const DwVfpRegister src2,
const Condition cond) {
// Instruction details available in ARM DDI 0406C.b, A8-892.
// cond(31-28) | 11100(27-23) | D=?(22) | 00(21-20) | Vn(19-16) |
// Vd(15-12) | 101(11-9) | sz(8)=1 | N=?(7) | op(6)=0 | M=?(5) | 0(4) |
// Vm(3-0)
unsigned x = (cond | 0x1C*B23 | src1.code()*B16 |
dst.code()*B12 | 0x5*B9 | B8 | src2.code());
emit(x);
}
void Assembler::vdiv(const DwVfpRegister dst, void Assembler::vdiv(const DwVfpRegister dst,
const DwVfpRegister src1, const DwVfpRegister src1,
const DwVfpRegister src2, const DwVfpRegister src2,
......
...@@ -1126,6 +1126,10 @@ class Assembler : public AssemblerBase { ...@@ -1126,6 +1126,10 @@ class Assembler : public AssemblerBase {
const DwVfpRegister src1, const DwVfpRegister src1,
const DwVfpRegister src2, const DwVfpRegister src2,
const Condition cond = al); const Condition cond = al);
void vmla(const DwVfpRegister dst,
const DwVfpRegister src1,
const DwVfpRegister src2,
const Condition cond = al);
void vdiv(const DwVfpRegister dst, void vdiv(const DwVfpRegister dst,
const DwVfpRegister src1, const DwVfpRegister src1,
const DwVfpRegister src2, const DwVfpRegister src2,
......
...@@ -1098,6 +1098,7 @@ int Decoder::DecodeType7(Instruction* instr) { ...@@ -1098,6 +1098,7 @@ int Decoder::DecodeType7(Instruction* instr) {
// Dd = vadd(Dn, Dm) // Dd = vadd(Dn, Dm)
// Dd = vsub(Dn, Dm) // Dd = vsub(Dn, Dm)
// Dd = vmul(Dn, Dm) // Dd = vmul(Dn, Dm)
// Dd = vmla(Dn, Dm)
// Dd = vdiv(Dn, Dm) // Dd = vdiv(Dn, Dm)
// vcmp(Dd, Dm) // vcmp(Dd, Dm)
// vmrs // vmrs
...@@ -1160,6 +1161,12 @@ void Decoder::DecodeTypeVFP(Instruction* instr) { ...@@ -1160,6 +1161,12 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
} else { } else {
Unknown(instr); // Not used by V8. Unknown(instr); // Not used by V8.
} }
} else if ((instr->Opc1Value() == 0x0) && !(instr->Opc3Value() & 0x1)) {
if (instr->SzValue() == 0x1) {
Format(instr, "vmla.f64'cond 'Dd, 'Dn, 'Dm");
} else {
Unknown(instr); // Not used by V8.
}
} else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) { } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) {
if (instr->SzValue() == 0x1) { if (instr->SzValue() == 0x1) {
Format(instr, "vdiv.f64'cond 'Dd, 'Dn, 'Dm"); Format(instr, "vdiv.f64'cond 'Dd, 'Dn, 'Dm");
......
...@@ -1303,8 +1303,21 @@ LInstruction* LChunkBuilder::DoMul(HMul* instr) { ...@@ -1303,8 +1303,21 @@ LInstruction* LChunkBuilder::DoMul(HMul* instr) {
return DefineAsRegister(mul); return DefineAsRegister(mul);
} else if (instr->representation().IsDouble()) { } else if (instr->representation().IsDouble()) {
return DoArithmeticD(Token::MUL, instr); if (instr->UseCount() == 1 && instr->uses().value()->IsAdd()) {
HAdd* add = HAdd::cast(instr->uses().value());
if (instr == add->left()) {
// This mul is the lhs of an add. The add and mul will be folded
// into a multiply-add.
return NULL;
}
if (instr == add->right() && !add->left()->IsMul()) {
// This mul is the rhs of an add, where the lhs is not another mul.
// The add and mul will be folded into a multiply-add.
return NULL;
}
}
return DoArithmeticD(Token::MUL, instr);
} else { } else {
return DoArithmeticT(Token::MUL, instr); return DoArithmeticT(Token::MUL, instr);
} }
...@@ -1330,6 +1343,13 @@ LInstruction* LChunkBuilder::DoSub(HSub* instr) { ...@@ -1330,6 +1343,13 @@ LInstruction* LChunkBuilder::DoSub(HSub* instr) {
} }
} }
LInstruction* LChunkBuilder::DoMultiplyAdd(HMul* mul, HValue* addend) {
LOperand* multiplier_op = UseRegisterAtStart(mul->left());
LOperand* multiplicand_op = UseRegisterAtStart(mul->right());
LOperand* addend_op = UseRegisterAtStart(addend);
return DefineSameAsFirst(new(zone()) LMultiplyAddD(addend_op, multiplier_op,
multiplicand_op));
}
LInstruction* LChunkBuilder::DoAdd(HAdd* instr) { LInstruction* LChunkBuilder::DoAdd(HAdd* instr) {
if (instr->representation().IsInteger32()) { if (instr->representation().IsInteger32()) {
...@@ -1344,6 +1364,14 @@ LInstruction* LChunkBuilder::DoAdd(HAdd* instr) { ...@@ -1344,6 +1364,14 @@ LInstruction* LChunkBuilder::DoAdd(HAdd* instr) {
} }
return result; return result;
} else if (instr->representation().IsDouble()) { } else if (instr->representation().IsDouble()) {
if (instr->left()->IsMul())
return DoMultiplyAdd(HMul::cast(instr->left()), instr->right());
if (instr->right()->IsMul()) {
ASSERT(!instr->left()->IsMul());
return DoMultiplyAdd(HMul::cast(instr->right()), instr->left());
}
return DoArithmeticD(Token::ADD, instr); return DoArithmeticD(Token::ADD, instr);
} else { } else {
ASSERT(instr->representation().IsTagged()); ASSERT(instr->representation().IsTagged());
......
...@@ -135,6 +135,7 @@ class LCodeGen; ...@@ -135,6 +135,7 @@ class LCodeGen;
V(MathMinMax) \ V(MathMinMax) \
V(ModI) \ V(ModI) \
V(MulI) \ V(MulI) \
V(MultiplyAddD) \
V(NumberTagD) \ V(NumberTagD) \
V(NumberTagI) \ V(NumberTagI) \
V(NumberTagU) \ V(NumberTagU) \
...@@ -621,6 +622,24 @@ class LMulI: public LTemplateInstruction<1, 2, 1> { ...@@ -621,6 +622,24 @@ class LMulI: public LTemplateInstruction<1, 2, 1> {
}; };
// Instruction for computing multiplier * multiplicand + addend.
class LMultiplyAddD: public LTemplateInstruction<1, 3, 0> {
public:
LMultiplyAddD(LOperand* addend, LOperand* multiplier,
LOperand* multiplicand) {
inputs_[0] = addend;
inputs_[1] = multiplier;
inputs_[2] = multiplicand;
}
LOperand* addend() { return inputs_[0]; }
LOperand* multiplier() { return inputs_[1]; }
LOperand* multiplicand() { return inputs_[2]; }
DECLARE_CONCRETE_INSTRUCTION(MultiplyAddD, "multiply-add-d")
};
class LCmpIDAndBranch: public LControlInstruction<2, 0> { class LCmpIDAndBranch: public LControlInstruction<2, 0> {
public: public:
LCmpIDAndBranch(LOperand* left, LOperand* right) { LCmpIDAndBranch(LOperand* left, LOperand* right) {
...@@ -2396,6 +2415,8 @@ class LChunkBuilder BASE_EMBEDDED { ...@@ -2396,6 +2415,8 @@ class LChunkBuilder BASE_EMBEDDED {
HYDROGEN_CONCRETE_INSTRUCTION_LIST(DECLARE_DO) HYDROGEN_CONCRETE_INSTRUCTION_LIST(DECLARE_DO)
#undef DECLARE_DO #undef DECLARE_DO
LInstruction* DoMultiplyAdd(HMul* mul, HValue* addend);
static bool HasMagicNumberForDivisor(int32_t divisor); static bool HasMagicNumberForDivisor(int32_t divisor);
static HValue* SimplifiedDividendForMathFloorOfDiv(HValue* val); static HValue* SimplifiedDividendForMathFloorOfDiv(HValue* val);
static HValue* SimplifiedDivisorForMathFloorOfDiv(HValue* val); static HValue* SimplifiedDivisorForMathFloorOfDiv(HValue* val);
......
...@@ -1297,6 +1297,18 @@ void LCodeGen::DoDivI(LDivI* instr) { ...@@ -1297,6 +1297,18 @@ void LCodeGen::DoDivI(LDivI* instr) {
} }
void LCodeGen::DoMultiplyAddD(LMultiplyAddD* instr) {
DwVfpRegister addend = ToDoubleRegister(instr->addend());
DwVfpRegister multiplier = ToDoubleRegister(instr->multiplier());
DwVfpRegister multiplicand = ToDoubleRegister(instr->multiplicand());
// This is computed in-place.
ASSERT(addend.is(ToDoubleRegister(instr->result())));
__ vmla(addend, multiplier, multiplicand);
}
void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) { void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) {
const Register result = ToRegister(instr->result()); const Register result = ToRegister(instr->result());
const Register left = ToRegister(instr->left()); const Register left = ToRegister(instr->left());
......
...@@ -2778,6 +2778,20 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { ...@@ -2778,6 +2778,20 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
double dm_value = get_double_from_d_register(vm); double dm_value = get_double_from_d_register(vm);
double dd_value = dn_value * dm_value; double dd_value = dn_value * dm_value;
set_d_register_from_double(vd, dd_value); set_d_register_from_double(vd, dd_value);
} else if ((instr->Opc1Value() == 0x0) && !(instr->Opc3Value() & 0x1)) {
// vmla
if (instr->SzValue() != 0x1) {
UNREACHABLE(); // Not used by V8.
}
double dd_value = get_double_from_d_register(vd);
double dn_value = get_double_from_d_register(vn);
double dm_value = get_double_from_d_register(vm);
// Note: we do the mul and add in separate steps to avoid getting a result
// with too high precision.
set_d_register_from_double(vd, dn_value * dm_value);
set_d_register_from_double(vd, get_double_from_d_register(vd) + dd_value);
} else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) { } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) {
// vdiv // vdiv
if (instr->SzValue() != 0x1) { if (instr->SzValue() != 0x1) {
......
...@@ -259,6 +259,8 @@ TEST(4) { ...@@ -259,6 +259,8 @@ TEST(4) {
__ vadd(d5, d6, d7); __ vadd(d5, d6, d7);
__ vstr(d5, r4, OFFSET_OF(T, c)); __ vstr(d5, r4, OFFSET_OF(T, c));
__ vmla(d5, d6, d7);
__ vmov(r2, r3, d5); __ vmov(r2, r3, d5);
__ vmov(d4, r2, r3); __ vmov(d4, r2, r3);
__ vstr(d4, r4, OFFSET_OF(T, b)); __ vstr(d4, r4, OFFSET_OF(T, b));
...@@ -347,7 +349,7 @@ TEST(4) { ...@@ -347,7 +349,7 @@ TEST(4) {
CHECK_EQ(1.0, t.e); CHECK_EQ(1.0, t.e);
CHECK_EQ(1.000000059604644775390625, t.d); CHECK_EQ(1.000000059604644775390625, t.d);
CHECK_EQ(4.25, t.c); CHECK_EQ(4.25, t.c);
CHECK_EQ(4.25, t.b); CHECK_EQ(8.375, t.b);
CHECK_EQ(1.5, t.a); CHECK_EQ(1.5, t.a);
} }
} }
......
...@@ -547,6 +547,11 @@ TEST(Vfp) { ...@@ -547,6 +547,11 @@ TEST(Vfp) {
"ec860a20 vstmia r6, {s0-s31}"); "ec860a20 vstmia r6, {s0-s31}");
COMPARE(vldm(ia, r7, s0, s31), COMPARE(vldm(ia, r7, s0, s31),
"ec970a20 vldmia r7, {s0-s31}"); "ec970a20 vldmia r7, {s0-s31}");
COMPARE(vmla(d2, d1, d0),
"ee012b00 vmla.f64 d2, d1, d0");
COMPARE(vmla(d6, d4, d5, cc),
"3e046b05 vmla.f64cc d6, d4, d5");
} }
VERIFY_RUN(); VERIFY_RUN();
...@@ -753,4 +758,3 @@ TEST(LoadStore) { ...@@ -753,4 +758,3 @@ TEST(LoadStore) {
VERIFY_RUN(); VERIFY_RUN();
} }
// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Flags: --allow-natives-syntax
// Test expressions that can be computed with a multiply-add instruction.
function f(a, b, c) {
return a * b + c;
}
function g(a, b, c) {
return a + b * c;
}
function h(a, b, c, d) {
return a * b + c * d;
}
assertEquals(5, f(1, 2, 3));
assertEquals(5, f(1, 2, 3));
%OptimizeFunctionOnNextCall(f);
assertEquals(5, f(1, 2, 3));
assertEquals("2foo", f(1, 2, "foo"));
assertEquals(5.41, f(1.1, 2.1, 3.1));
assertEquals(5.41, f(1.1, 2.1, 3.1));
%OptimizeFunctionOnNextCall(f);
assertEquals(5.41, f(1.1, 2.1, 3.1));
assertEquals(7, g(1, 2, 3));
assertEquals(7, g(1, 2, 3));
%OptimizeFunctionOnNextCall(g);
assertEquals(7, g(1, 2, 3));
assertEquals(8.36, g(1.1, 2.2, 3.3));
assertEquals(8.36, g(1.1, 2.2, 3.3));
%OptimizeFunctionOnNextCall(g);
assertEquals(8.36, g(1.1, 2.2, 3.3));
assertEquals(14, h(1, 2, 3, 4));
assertEquals(14, h(1, 2, 3, 4));
%OptimizeFunctionOnNextCall(h);
assertEquals(14, h(1, 2, 3, 4));
assertEquals(15.02, h(1.1, 2.1, 3.1, 4.1));
assertEquals(15.02, h(1.1, 2.1, 3.1, 4.1));
%OptimizeFunctionOnNextCall(h);
assertEquals(15.02, h(1.1, 2.1, 3.1, 4.1));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment