ARM: Implement memcpy using NEON.

Add support for a few NEON and ARM SIMD instructions and use them for various
memcpy operations.

BUG=none
TEST=none

Review URL: https://chromiumcodereview.appspot.com/17858002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@15602 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent c04a72e7
This diff is collapsed.
...@@ -78,12 +78,15 @@ class CpuFeatures : public AllStatic { ...@@ -78,12 +78,15 @@ class CpuFeatures : public AllStatic {
(!Serializer::enabled() || !IsFoundByRuntimeProbingOnly(f))); (!Serializer::enabled() || !IsFoundByRuntimeProbingOnly(f)));
} }
static unsigned cache_line_size() { return cache_line_size_; }
private: private:
#ifdef DEBUG #ifdef DEBUG
static bool initialized_; static bool initialized_;
#endif #endif
static unsigned supported_; static unsigned supported_;
static unsigned found_by_runtime_probing_only_; static unsigned found_by_runtime_probing_only_;
static unsigned cache_line_size_;
friend class ExternalReference; friend class ExternalReference;
DISALLOW_COPY_AND_ASSIGN(CpuFeatures); DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
...@@ -301,6 +304,36 @@ struct DwVfpRegister { ...@@ -301,6 +304,36 @@ struct DwVfpRegister {
typedef DwVfpRegister DoubleRegister; typedef DwVfpRegister DoubleRegister;
// Quad word NEON register.
struct QwNeonRegister {
static const int kMaxNumRegisters = 16;
static QwNeonRegister from_code(int code) {
QwNeonRegister r = { code };
return r;
}
bool is_valid() const {
return (0 <= code_) && (code_ < kMaxNumRegisters);
}
bool is(QwNeonRegister reg) const { return code_ == reg.code_; }
int code() const {
ASSERT(is_valid());
return code_;
}
void split_code(int* vm, int* m) const {
ASSERT(is_valid());
*m = (code_ & 0x10) >> 4;
*vm = code_ & 0x0F;
}
int code_;
};
typedef QwNeonRegister QuadRegister;
// Support for the VFP registers s0 to s31 (d0 to d15). // Support for the VFP registers s0 to s31 (d0 to d15).
// Note that "s(N):s(N+1)" is the same as "d(N/2)". // Note that "s(N):s(N+1)" is the same as "d(N/2)".
const SwVfpRegister s0 = { 0 }; const SwVfpRegister s0 = { 0 };
...@@ -370,6 +403,23 @@ const DwVfpRegister d29 = { 29 }; ...@@ -370,6 +403,23 @@ const DwVfpRegister d29 = { 29 };
const DwVfpRegister d30 = { 30 }; const DwVfpRegister d30 = { 30 };
const DwVfpRegister d31 = { 31 }; const DwVfpRegister d31 = { 31 };
const QwNeonRegister q0 = { 0 };
const QwNeonRegister q1 = { 1 };
const QwNeonRegister q2 = { 2 };
const QwNeonRegister q3 = { 3 };
const QwNeonRegister q4 = { 4 };
const QwNeonRegister q5 = { 5 };
const QwNeonRegister q6 = { 6 };
const QwNeonRegister q7 = { 7 };
const QwNeonRegister q8 = { 8 };
const QwNeonRegister q9 = { 9 };
const QwNeonRegister q10 = { 10 };
const QwNeonRegister q11 = { 11 };
const QwNeonRegister q12 = { 12 };
const QwNeonRegister q13 = { 13 };
const QwNeonRegister q14 = { 14 };
const QwNeonRegister q15 = { 15 };
// Aliases for double registers. Defined using #define instead of // Aliases for double registers. Defined using #define instead of
// "static const DwVfpRegister&" because Clang complains otherwise when a // "static const DwVfpRegister&" because Clang complains otherwise when a
// compilation unit that includes this header doesn't use the variables. // compilation unit that includes this header doesn't use the variables.
...@@ -562,6 +612,42 @@ class MemOperand BASE_EMBEDDED { ...@@ -562,6 +612,42 @@ class MemOperand BASE_EMBEDDED {
friend class Assembler; friend class Assembler;
}; };
// Class NeonMemOperand represents a memory operand in load and
// store NEON instructions
class NeonMemOperand BASE_EMBEDDED {
public:
// [rn {:align}] Offset
// [rn {:align}]! PostIndex
explicit NeonMemOperand(Register rn, AddrMode am = Offset, int align = 0);
// [rn {:align}], rm PostIndex
explicit NeonMemOperand(Register rn, Register rm, int align = 0);
Register rn() const { return rn_; }
Register rm() const { return rm_; }
int align() const { return align_; }
private:
void SetAlignment(int align);
Register rn_; // base
Register rm_; // register increment
int align_;
};
// Class NeonListOperand represents a list of NEON registers
class NeonListOperand BASE_EMBEDDED {
public:
explicit NeonListOperand(DoubleRegister base, int registers_count = 1);
DoubleRegister base() const { return base_; }
NeonListType type() const { return type_; }
private:
DoubleRegister base_;
NeonListType type_;
};
extern const Instr kMovLrPc; extern const Instr kMovLrPc;
extern const Instr kLdrPCMask; extern const Instr kLdrPCMask;
extern const Instr kLdrPCPattern; extern const Instr kLdrPCPattern;
...@@ -866,6 +952,19 @@ class Assembler : public AssemblerBase { ...@@ -866,6 +952,19 @@ class Assembler : public AssemblerBase {
void bfi(Register dst, Register src, int lsb, int width, void bfi(Register dst, Register src, int lsb, int width,
Condition cond = al); Condition cond = al);
void pkhbt(Register dst, Register src1, const Operand& src2,
Condition cond = al);
void pkhtb(Register dst, Register src1, const Operand& src2,
Condition cond = al);
void uxtb(Register dst, const Operand& src, Condition cond = al);
void uxtab(Register dst, Register src1, const Operand& src2,
Condition cond = al);
void uxtb16(Register dst, const Operand& src, Condition cond = al);
// Status register access instructions // Status register access instructions
void mrs(Register dst, SRegister s, Condition cond = al); void mrs(Register dst, SRegister s, Condition cond = al);
...@@ -887,6 +986,9 @@ class Assembler : public AssemblerBase { ...@@ -887,6 +986,9 @@ class Assembler : public AssemblerBase {
Register src2, Register src2,
const MemOperand& dst, Condition cond = al); const MemOperand& dst, Condition cond = al);
// Preload instructions
void pld(const MemOperand& address);
// Load/Store multiple instructions // Load/Store multiple instructions
void ldm(BlockAddrMode am, Register base, RegList dst, Condition cond = al); void ldm(BlockAddrMode am, Register base, RegList dst, Condition cond = al);
void stm(BlockAddrMode am, Register base, RegList src, Condition cond = al); void stm(BlockAddrMode am, Register base, RegList src, Condition cond = al);
...@@ -1097,6 +1199,17 @@ class Assembler : public AssemblerBase { ...@@ -1097,6 +1199,17 @@ class Assembler : public AssemblerBase {
const DwVfpRegister src, const DwVfpRegister src,
const Condition cond = al); const Condition cond = al);
// Support for NEON.
// All these APIs support D0 to D31 and Q0 to Q15.
void vld1(NeonSize size,
const NeonListOperand& dst,
const NeonMemOperand& src);
void vst1(NeonSize size,
const NeonListOperand& src,
const NeonMemOperand& dst);
void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src);
// Pseudo instructions // Pseudo instructions
// Different nop operations are used by the code generator to detect certain // Different nop operations are used by the code generator to detect certain
......
...@@ -112,6 +112,252 @@ UnaryMathFunction CreateExpFunction() { ...@@ -112,6 +112,252 @@ UnaryMathFunction CreateExpFunction() {
#endif #endif
} }
#if defined(V8_HOST_ARCH_ARM)
OS::MemCopyUint8Function CreateMemCopyUint8Function(
OS::MemCopyUint8Function stub) {
#if defined(USE_SIMULATOR)
return stub;
#else
if (Serializer::enabled() || !CpuFeatures::IsSupported(UNALIGNED_ACCESSES)) {
return stub;
}
size_t actual_size;
byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
if (buffer == NULL) return stub;
MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
Register dest = r0;
Register src = r1;
Register chars = r2;
Register temp1 = r3;
Label less_4;
if (CpuFeatures::IsSupported(NEON)) {
Label loop, less_256, less_128, less_64, less_32, _16_or_less, _8_or_less;
Label size_less_than_8;
__ pld(MemOperand(src, 0));
__ cmp(chars, Operand(8));
__ b(lt, &size_less_than_8);
__ cmp(chars, Operand(32));
__ b(lt, &less_32);
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 32));
}
__ cmp(chars, Operand(64));
__ b(lt, &less_64);
__ pld(MemOperand(src, 64));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 96));
}
__ cmp(chars, Operand(128));
__ b(lt, &less_128);
__ pld(MemOperand(src, 128));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 160));
}
__ pld(MemOperand(src, 192));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 224));
}
__ cmp(chars, Operand(256));
__ b(lt, &less_256);
__ sub(chars, chars, Operand(256));
__ bind(&loop);
__ pld(MemOperand(src, 256));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 256));
}
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ sub(chars, chars, Operand(64), SetCC);
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ b(ge, &loop);
__ add(chars, chars, Operand(256));
__ bind(&less_256);
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ sub(chars, chars, Operand(128));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ cmp(chars, Operand(64));
__ b(lt, &less_64);
__ bind(&less_128);
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ sub(chars, chars, Operand(64));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ bind(&less_64);
__ cmp(chars, Operand(32));
__ b(lt, &less_32);
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ sub(chars, chars, Operand(32));
__ bind(&less_32);
__ cmp(chars, Operand(16));
__ b(le, &_16_or_less);
__ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex));
__ sub(chars, chars, Operand(16));
__ bind(&_16_or_less);
__ cmp(chars, Operand(8));
__ b(le, &_8_or_less);
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest, PostIndex));
__ sub(chars, chars, Operand(8));
// Do a last copy which may overlap with the previous copy (up to 8 bytes).
__ bind(&_8_or_less);
__ rsb(chars, chars, Operand(8));
__ sub(src, src, Operand(chars));
__ sub(dest, dest, Operand(chars));
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src));
__ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest));
__ Ret();
__ bind(&size_less_than_8);
__ bic(temp1, chars, Operand(0x3), SetCC);
__ b(&less_4, eq);
__ ldr(temp1, MemOperand(src, 4, PostIndex));
__ str(temp1, MemOperand(dest, 4, PostIndex));
} else {
Register temp2 = ip;
Label loop;
__ bic(temp2, chars, Operand(0x3), SetCC);
__ b(&less_4, eq);
__ add(temp2, dest, temp2);
__ bind(&loop);
__ ldr(temp1, MemOperand(src, 4, PostIndex));
__ str(temp1, MemOperand(dest, 4, PostIndex));
__ cmp(dest, temp2);
__ b(&loop, ne);
}
__ bind(&less_4);
__ mov(chars, Operand(chars, LSL, 31), SetCC);
// bit0 => Z (ne), bit1 => C (cs)
__ ldrh(temp1, MemOperand(src, 2, PostIndex), cs);
__ strh(temp1, MemOperand(dest, 2, PostIndex), cs);
__ ldrb(temp1, MemOperand(src), ne);
__ strb(temp1, MemOperand(dest), ne);
__ Ret();
CodeDesc desc;
masm.GetCode(&desc);
ASSERT(!RelocInfo::RequiresRelocation(desc));
CPU::FlushICache(buffer, actual_size);
OS::ProtectCode(buffer, actual_size);
return FUNCTION_CAST<OS::MemCopyUint8Function>(buffer);
#endif
}
// Convert 8 to 16. The number of character to copy must be at least 8.
OS::MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function(
OS::MemCopyUint16Uint8Function stub) {
#if defined(USE_SIMULATOR)
return stub;
#else
if (Serializer::enabled() || !CpuFeatures::IsSupported(UNALIGNED_ACCESSES)) {
return stub;
}
size_t actual_size;
byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
if (buffer == NULL) return stub;
MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
Register dest = r0;
Register src = r1;
Register chars = r2;
if (CpuFeatures::IsSupported(NEON)) {
Register temp = r3;
Label loop;
__ bic(temp, chars, Operand(0x7));
__ sub(chars, chars, Operand(temp));
__ add(temp, dest, Operand(temp, LSL, 1));
__ bind(&loop);
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex));
__ vmovl(NeonU8, q0, d0);
__ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex));
__ cmp(dest, temp);
__ b(&loop, ne);
// Do a last copy which will overlap with the previous copy (1 to 8 bytes).
__ rsb(chars, chars, Operand(8));
__ sub(src, src, Operand(chars));
__ sub(dest, dest, Operand(chars, LSL, 1));
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src));
__ vmovl(NeonU8, q0, d0);
__ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest));
__ Ret();
} else {
Register temp1 = r3;
Register temp2 = ip;
Register temp3 = lr;
Register temp4 = r4;
Label loop;
Label not_two;
__ Push(lr, r4);
__ bic(temp2, chars, Operand(0x3));
__ add(temp2, dest, Operand(temp2, LSL, 1));
__ bind(&loop);
__ ldr(temp1, MemOperand(src, 4, PostIndex));
__ uxtb16(temp3, Operand(temp1, ROR, 0));
__ uxtb16(temp4, Operand(temp1, ROR, 8));
__ pkhbt(temp1, temp3, Operand(temp4, LSL, 16));
__ str(temp1, MemOperand(dest));
__ pkhtb(temp1, temp4, Operand(temp3, ASR, 16));
__ str(temp1, MemOperand(dest, 4));
__ add(dest, dest, Operand(8));
__ cmp(dest, temp2);
__ b(&loop, ne);
__ mov(chars, Operand(chars, LSL, 31), SetCC); // bit0 => ne, bit1 => cs
__ b(&not_two, cc);
__ ldrh(temp1, MemOperand(src, 2, PostIndex));
__ uxtb(temp3, Operand(temp1, ROR, 8));
__ mov(temp3, Operand(temp3, LSL, 16));
__ uxtab(temp3, temp3, Operand(temp1, ROR, 0));
__ str(temp3, MemOperand(dest, 4, PostIndex));
__ bind(&not_two);
__ ldrb(temp1, MemOperand(src), ne);
__ strh(temp1, MemOperand(dest), ne);
__ Pop(pc, r4);
}
CodeDesc desc;
masm.GetCode(&desc);
CPU::FlushICache(buffer, actual_size);
OS::ProtectCode(buffer, actual_size);
return FUNCTION_CAST<OS::MemCopyUint16Uint8Function>(buffer);
#endif
}
#endif
#undef __ #undef __
......
...@@ -33,22 +33,6 @@ ...@@ -33,22 +33,6 @@
#error ARM EABI support is required. #error ARM EABI support is required.
#endif #endif
#if defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7__)
# define CAN_USE_ARMV7_INSTRUCTIONS 1
#ifndef CAN_USE_VFP3_INSTRUCTIONS
# define CAN_USE_VFP3_INSTRUCTIONS
#endif
#endif
// Simulator should support unaligned access by default.
#if !defined(__arm__)
# ifndef CAN_USE_UNALIGNED_ACCESSES
# define CAN_USE_UNALIGNED_ACCESSES 1
# endif
#endif
namespace v8 { namespace v8 {
namespace internal { namespace internal {
...@@ -331,6 +315,32 @@ enum LFlag { ...@@ -331,6 +315,32 @@ enum LFlag {
}; };
// NEON data type
enum NeonDataType {
NeonS8 = 0x1, // U = 0, imm3 = 0b001
NeonS16 = 0x2, // U = 0, imm3 = 0b010
NeonS32 = 0x4, // U = 0, imm3 = 0b100
NeonU8 = 1 << 24 | 0x1, // U = 1, imm3 = 0b001
NeonU16 = 1 << 24 | 0x2, // U = 1, imm3 = 0b010
NeonU32 = 1 << 24 | 0x4, // U = 1, imm3 = 0b100
NeonDataTypeSizeMask = 0x7,
NeonDataTypeUMask = 1 << 24
};
enum NeonListType {
nlt_1 = 0x7,
nlt_2 = 0xA,
nlt_3 = 0x6,
nlt_4 = 0x2
};
enum NeonSize {
Neon8 = 0x0,
Neon16 = 0x1,
Neon32 = 0x2,
Neon64 = 0x4
};
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Supervisor Call (svc) specific support. // Supervisor Call (svc) specific support.
...@@ -573,6 +583,7 @@ class Instruction { ...@@ -573,6 +583,7 @@ class Instruction {
DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionField); DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionField);
inline int TypeValue() const { return Bits(27, 25); } inline int TypeValue() const { return Bits(27, 25); }
inline int SpecialValue() const { return Bits(27, 23); }
inline int RnValue() const { return Bits(19, 16); } inline int RnValue() const { return Bits(19, 16); }
DECLARE_STATIC_ACCESSOR(RnValue); DECLARE_STATIC_ACCESSOR(RnValue);
......
...@@ -113,6 +113,8 @@ class Decoder { ...@@ -113,6 +113,8 @@ class Decoder {
// Handle formatting of instructions and their options. // Handle formatting of instructions and their options.
int FormatRegister(Instruction* instr, const char* option); int FormatRegister(Instruction* instr, const char* option);
void FormatNeonList(int Vd, int type);
void FormatNeonMemory(int Rn, int align, int Rm);
int FormatOption(Instruction* instr, const char* option); int FormatOption(Instruction* instr, const char* option);
void Format(Instruction* instr, const char* format); void Format(Instruction* instr, const char* format);
void Unknown(Instruction* instr); void Unknown(Instruction* instr);
...@@ -133,6 +135,8 @@ class Decoder { ...@@ -133,6 +135,8 @@ class Decoder {
void DecodeTypeVFP(Instruction* instr); void DecodeTypeVFP(Instruction* instr);
void DecodeType6CoprocessorIns(Instruction* instr); void DecodeType6CoprocessorIns(Instruction* instr);
void DecodeSpecialCondition(Instruction* instr);
void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr); void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
void DecodeVCMP(Instruction* instr); void DecodeVCMP(Instruction* instr);
void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr); void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
...@@ -419,6 +423,41 @@ int Decoder::FormatVFPinstruction(Instruction* instr, const char* format) { ...@@ -419,6 +423,41 @@ int Decoder::FormatVFPinstruction(Instruction* instr, const char* format) {
} }
void Decoder::FormatNeonList(int Vd, int type) {
if (type == nlt_1) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d}", Vd);
} else if (type == nlt_2) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d, d%d}", Vd, Vd + 1);
} else if (type == nlt_3) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2);
} else if (type == nlt_4) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d, d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2, Vd + 3);
}
}
void Decoder::FormatNeonMemory(int Rn, int align, int Rm) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"[r%d", Rn);
if (align != 0) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
":%d", (1 << align) << 6);
}
if (Rm == 15) {
Print("]");
} else if (Rm == 13) {
Print("]!");
} else {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"], r%d", Rm);
}
}
// Print the movw or movt instruction. // Print the movw or movt instruction.
void Decoder::PrintMovwMovt(Instruction* instr) { void Decoder::PrintMovwMovt(Instruction* instr) {
int imm = instr->ImmedMovwMovtValue(); int imm = instr->ImmedMovwMovtValue();
...@@ -982,15 +1021,107 @@ void Decoder::DecodeType3(Instruction* instr) { ...@@ -982,15 +1021,107 @@ void Decoder::DecodeType3(Instruction* instr) {
break; break;
} }
case ia_x: { case ia_x: {
if (instr->HasW()) { if (instr->Bit(4) == 0) {
VERIFY(instr->Bits(5, 4) == 0x1); Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm");
if (instr->Bit(22) == 0x1) { } else {
if (instr->Bit(5) == 0) {
switch (instr->Bits(22, 21)) {
case 0:
if (instr->Bit(20) == 0) {
if (instr->Bit(6) == 0) {
Format(instr, "pkhbt'cond 'rd, 'rn, 'rm, lsl #'imm05@07");
} else {
if (instr->Bits(11, 7) == 0) {
Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #32");
} else {
Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #'imm05@07");
}
}
} else {
UNREACHABLE();
}
break;
case 1:
UNREACHABLE();
break;
case 2:
UNREACHABLE();
break;
case 3:
Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat"); Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat");
break;
}
} else { } else {
UNREACHABLE(); // SSAT. switch (instr->Bits(22, 21)) {
case 0:
UNREACHABLE();
break;
case 1:
UNREACHABLE();
break;
case 2:
if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
if (instr->Bits(19, 16) == 0xF) {
switch (instr->Bits(11, 10)) {
case 0:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #0");
break;
case 1:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #8");
break;
case 2:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #16");
break;
case 3:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #24");
break;
} }
} else { } else {
Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm"); UNREACHABLE();
}
} else {
UNREACHABLE();
}
break;
case 3:
if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
if (instr->Bits(19, 16) == 0xF) {
switch (instr->Bits(11, 10)) {
case 0:
Format(instr, "uxtb'cond 'rd, 'rm, ror #0");
break;
case 1:
Format(instr, "uxtb'cond 'rd, 'rm, ror #8");
break;
case 2:
Format(instr, "uxtb'cond 'rd, 'rm, ror #16");
break;
case 3:
Format(instr, "uxtb'cond 'rd, 'rm, ror #24");
break;
}
} else {
switch (instr->Bits(11, 10)) {
case 0:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #0");
break;
case 1:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #8");
break;
case 2:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #16");
break;
case 3:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #24");
break;
}
}
} else {
UNREACHABLE();
}
break;
}
}
} }
break; break;
} }
...@@ -1423,6 +1554,91 @@ void Decoder::DecodeType6CoprocessorIns(Instruction* instr) { ...@@ -1423,6 +1554,91 @@ void Decoder::DecodeType6CoprocessorIns(Instruction* instr) {
} }
} }
void Decoder::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.s%d q%d, d%d", imm3*8, Vd, Vm);
} else {
Unknown(instr);
}
break;
case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl unsigned
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm);
} else {
Unknown(instr);
}
break;
case 8:
if (instr->Bits(21, 20) == 0) {
// vst1
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Rn = instr->VnValue();
int type = instr->Bits(11, 8);
int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4);
int Rm = instr->VmValue();
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vst1.%d ", (1 << size) << 3);
FormatNeonList(Vd, type);
Print(", ");
FormatNeonMemory(Rn, align, Rm);
} else if (instr->Bits(21, 20) == 2) {
// vld1
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Rn = instr->VnValue();
int type = instr->Bits(11, 8);
int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4);
int Rm = instr->VmValue();
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vld1.%d ", (1 << size) << 3);
FormatNeonList(Vd, type);
Print(", ");
FormatNeonMemory(Rn, align, Rm);
} else {
Unknown(instr);
}
break;
case 0xA:
case 0xB:
if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xf)) {
int Rn = instr->Bits(19, 16);
int offset = instr->Bits(11, 0);
if (offset == 0) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d]", Rn);
} else if (instr->Bit(23) == 0) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d, #-%d]", Rn, offset);
} else {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d, #+%d]", Rn, offset);
}
} else {
Unknown(instr);
}
break;
default:
Unknown(instr);
break;
}
}
#undef VERIFIY #undef VERIFIY
bool Decoder::IsConstantPoolAt(byte* instr_ptr) { bool Decoder::IsConstantPoolAt(byte* instr_ptr) {
...@@ -1449,7 +1665,7 @@ int Decoder::InstructionDecode(byte* instr_ptr) { ...@@ -1449,7 +1665,7 @@ int Decoder::InstructionDecode(byte* instr_ptr) {
"%08x ", "%08x ",
instr->InstructionBits()); instr->InstructionBits());
if (instr->ConditionField() == kSpecialCondition) { if (instr->ConditionField() == kSpecialCondition) {
Unknown(instr); DecodeSpecialCondition(instr);
return Instruction::kInstrSize; return Instruction::kInstrSize;
} }
int instruction_bits = *(reinterpret_cast<int*>(instr_ptr)); int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));
......
This diff is collapsed.
...@@ -144,7 +144,10 @@ class Simulator { ...@@ -144,7 +144,10 @@ class Simulator {
d8, d9, d10, d11, d12, d13, d14, d15, d8, d9, d10, d11, d12, d13, d14, d15,
d16, d17, d18, d19, d20, d21, d22, d23, d16, d17, d18, d19, d20, d21, d22, d23,
d24, d25, d26, d27, d28, d29, d30, d31, d24, d25, d26, d27, d28, d29, d30, d31,
num_d_registers = 32 num_d_registers = 32,
q0 = 0, q1, q2, q3, q4, q5, q6, q7,
q8, q9, q10, q11, q12, q13, q14, q15,
num_q_registers = 16
}; };
explicit Simulator(Isolate* isolate); explicit Simulator(Isolate* isolate);
...@@ -163,6 +166,15 @@ class Simulator { ...@@ -163,6 +166,15 @@ class Simulator {
void set_dw_register(int dreg, const int* dbl); void set_dw_register(int dreg, const int* dbl);
// Support for VFP. // Support for VFP.
void get_d_register(int dreg, uint64_t* value);
void set_d_register(int dreg, const uint64_t* value);
void get_d_register(int dreg, uint32_t* value);
void set_d_register(int dreg, const uint32_t* value);
void get_q_register(int qreg, uint64_t* value);
void set_q_register(int qreg, const uint64_t* value);
void get_q_register(int qreg, uint32_t* value);
void set_q_register(int qreg, const uint32_t* value);
void set_s_register(int reg, unsigned int value); void set_s_register(int reg, unsigned int value);
unsigned int get_s_register(int reg) const; unsigned int get_s_register(int reg) const;
...@@ -328,6 +340,7 @@ class Simulator { ...@@ -328,6 +340,7 @@ class Simulator {
// Support for VFP. // Support for VFP.
void DecodeTypeVFP(Instruction* instr); void DecodeTypeVFP(Instruction* instr);
void DecodeType6CoprocessorIns(Instruction* instr); void DecodeType6CoprocessorIns(Instruction* instr);
void DecodeSpecialCondition(Instruction* instr);
void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr); void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
void DecodeVCMP(Instruction* instr); void DecodeVCMP(Instruction* instr);
......
...@@ -348,6 +348,8 @@ DEFINE_bool(enable_vfp3, ENABLE_VFP3_DEFAULT, ...@@ -348,6 +348,8 @@ DEFINE_bool(enable_vfp3, ENABLE_VFP3_DEFAULT,
"enable use of VFP3 instructions if available") "enable use of VFP3 instructions if available")
DEFINE_bool(enable_armv7, ENABLE_ARMV7_DEFAULT, DEFINE_bool(enable_armv7, ENABLE_ARMV7_DEFAULT,
"enable use of ARMv7 instructions if available (ARM only)") "enable use of ARMv7 instructions if available (ARM only)")
DEFINE_bool(enable_neon, true,
"enable use of NEON instructions if available (ARM only)")
DEFINE_bool(enable_sudiv, true, DEFINE_bool(enable_sudiv, true,
"enable use of SDIV and UDIV instructions if available (ARM only)") "enable use of SDIV and UDIV instructions if available (ARM only)")
DEFINE_bool(enable_movw_movt, false, DEFINE_bool(enable_movw_movt, false,
......
...@@ -89,12 +89,6 @@ namespace internal { ...@@ -89,12 +89,6 @@ namespace internal {
#elif defined(__ARMEL__) #elif defined(__ARMEL__)
#define V8_HOST_ARCH_ARM 1 #define V8_HOST_ARCH_ARM 1
#define V8_HOST_ARCH_32_BIT 1 #define V8_HOST_ARCH_32_BIT 1
// Some CPU-OS combinations allow unaligned access on ARM. We assume
// that unaligned accesses are not allowed unless the build system
// defines the CAN_USE_UNALIGNED_ACCESSES macro to be non-zero.
#if CAN_USE_UNALIGNED_ACCESSES
#define V8_HOST_CAN_READ_UNALIGNED 1
#endif
#elif defined(__MIPSEL__) #elif defined(__MIPSEL__)
#define V8_HOST_ARCH_MIPS 1 #define V8_HOST_ARCH_MIPS 1
#define V8_HOST_ARCH_32_BIT 1 #define V8_HOST_ARCH_32_BIT 1
...@@ -102,6 +96,16 @@ namespace internal { ...@@ -102,6 +96,16 @@ namespace internal {
#error Host architecture was not detected as supported by v8 #error Host architecture was not detected as supported by v8
#endif #endif
#if defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7__)
# define CAN_USE_ARMV7_INSTRUCTIONS 1
# ifndef CAN_USE_VFP3_INSTRUCTIONS
# define CAN_USE_VFP3_INSTRUCTIONS
# endif
#endif
// Target architecture detection. This may be set externally. If not, detect // Target architecture detection. This may be set externally. If not, detect
// in the same way as the host architecture, that is, target the native // in the same way as the host architecture, that is, target the native
// environment as presented by the compiler. // environment as presented by the compiler.
......
...@@ -146,6 +146,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) { ...@@ -146,6 +146,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) {
case VFP3: case VFP3:
search_string = "vfpv3"; search_string = "vfpv3";
break; break;
case NEON:
search_string = "neon";
break;
case ARMv7: case ARMv7:
search_string = "ARMv7"; search_string = "ARMv7";
break; break;
...@@ -200,6 +203,36 @@ CpuImplementer OS::GetCpuImplementer() { ...@@ -200,6 +203,36 @@ CpuImplementer OS::GetCpuImplementer() {
} }
CpuPart OS::GetCpuPart(CpuImplementer implementer) {
static bool use_cached_value = false;
static CpuPart cached_value = CPU_UNKNOWN;
if (use_cached_value) {
return cached_value;
}
if (implementer == ARM_IMPLEMENTER) {
if (CPUInfoContainsString("CPU part\t: 0xc0f")) {
cached_value = CORTEX_A15;
} else if (CPUInfoContainsString("CPU part\t: 0xc0c")) {
cached_value = CORTEX_A12;
} else if (CPUInfoContainsString("CPU part\t: 0xc09")) {
cached_value = CORTEX_A9;
} else if (CPUInfoContainsString("CPU part\t: 0xc08")) {
cached_value = CORTEX_A8;
} else if (CPUInfoContainsString("CPU part\t: 0xc07")) {
cached_value = CORTEX_A7;
} else if (CPUInfoContainsString("CPU part\t: 0xc05")) {
cached_value = CORTEX_A5;
} else {
cached_value = CPU_UNKNOWN;
}
} else {
cached_value = CPU_UNKNOWN;
}
use_cached_value = true;
return cached_value;
}
bool OS::ArmUsingHardFloat() { bool OS::ArmUsingHardFloat() {
// GCC versions 4.6 and above define __ARM_PCS or __ARM_PCS_VFP to specify // GCC versions 4.6 and above define __ARM_PCS or __ARM_PCS_VFP to specify
// the Floating Point ABI used (PCS stands for Procedure Call Standard). // the Floating Point ABI used (PCS stands for Procedure Call Standard).
......
...@@ -220,6 +220,11 @@ CpuImplementer OS::GetCpuImplementer() { ...@@ -220,6 +220,11 @@ CpuImplementer OS::GetCpuImplementer() {
} }
CpuPart OS::GetCpuPart(CpuImplementer implementer) {
UNIMPLEMENTED();
}
bool OS::ArmCpuHasFeature(CpuFeature feature) { bool OS::ArmCpuHasFeature(CpuFeature feature) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
......
...@@ -341,7 +341,26 @@ void OS::MemMove(void* dest, const void* src, size_t size) { ...@@ -341,7 +341,26 @@ void OS::MemMove(void* dest, const void* src, size_t size) {
(*memmove_function)(dest, src, size); (*memmove_function)(dest, src, size);
} }
#endif // V8_TARGET_ARCH_IA32 #elif defined(V8_HOST_ARCH_ARM)
void OS::MemCopyUint16Uint8Wrapper(uint16_t* dest,
const uint8_t* src,
size_t chars) {
uint16_t *limit = dest + chars;
while (dest < limit) {
*dest++ = static_cast<uint16_t>(*src++);
}
}
OS::MemCopyUint8Function OS::memcopy_uint8_function = &OS::MemCopyUint8Wrapper;
OS::MemCopyUint16Uint8Function OS::memcopy_uint16_uint8_function =
&OS::MemCopyUint16Uint8Wrapper;
// Defined in codegen-arm.cc.
OS::MemCopyUint8Function CreateMemCopyUint8Function(
OS::MemCopyUint8Function stub);
OS::MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function(
OS::MemCopyUint16Uint8Function stub);
#endif
void POSIXPostSetUp() { void POSIXPostSetUp() {
...@@ -350,6 +369,11 @@ void POSIXPostSetUp() { ...@@ -350,6 +369,11 @@ void POSIXPostSetUp() {
if (generated_memmove != NULL) { if (generated_memmove != NULL) {
memmove_function = generated_memmove; memmove_function = generated_memmove;
} }
#elif defined(V8_HOST_ARCH_ARM)
OS::memcopy_uint8_function =
CreateMemCopyUint8Function(&OS::MemCopyUint8Wrapper);
OS::memcopy_uint16_uint8_function =
CreateMemCopyUint16Uint8Function(&OS::MemCopyUint16Uint8Wrapper);
#endif #endif
init_fast_sin_function(); init_fast_sin_function();
init_fast_cos_function(); init_fast_cos_function();
......
...@@ -315,6 +315,9 @@ class OS { ...@@ -315,6 +315,9 @@ class OS {
// Support runtime detection of Cpu implementer // Support runtime detection of Cpu implementer
static CpuImplementer GetCpuImplementer(); static CpuImplementer GetCpuImplementer();
// Support runtime detection of Cpu implementer
static CpuPart GetCpuPart(CpuImplementer implementer);
// Support runtime detection of VFP3 on ARM CPUs. // Support runtime detection of VFP3 on ARM CPUs.
static bool ArmCpuHasFeature(CpuFeature feature); static bool ArmCpuHasFeature(CpuFeature feature);
...@@ -343,7 +346,42 @@ class OS { ...@@ -343,7 +346,42 @@ class OS {
static void MemCopy(void* dest, const void* src, size_t size) { static void MemCopy(void* dest, const void* src, size_t size) {
MemMove(dest, src, size); MemMove(dest, src, size);
} }
#else // V8_TARGET_ARCH_IA32 #elif defined(V8_HOST_ARCH_ARM)
typedef void (*MemCopyUint8Function)(uint8_t* dest,
const uint8_t* src,
size_t size);
static MemCopyUint8Function memcopy_uint8_function;
static void MemCopyUint8Wrapper(uint8_t* dest,
const uint8_t* src,
size_t chars) {
memcpy(dest, src, chars);
}
// For values < 16, the assembler function is slower than the inlined C code.
static const int kMinComplexMemCopy = 16;
static void MemCopy(void* dest, const void* src, size_t size) {
(*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest),
reinterpret_cast<const uint8_t*>(src),
size);
}
static void MemMove(void* dest, const void* src, size_t size) {
memmove(dest, src, size);
}
typedef void (*MemCopyUint16Uint8Function)(uint16_t* dest,
const uint8_t* src,
size_t size);
static MemCopyUint16Uint8Function memcopy_uint16_uint8_function;
static void MemCopyUint16Uint8Wrapper(uint16_t* dest,
const uint8_t* src,
size_t chars);
// For values < 12, the assembler function is slower than the inlined C code.
static const int kMinComplexConvertMemCopy = 12;
static void MemCopyUint16Uint8(uint16_t* dest,
const uint8_t* src,
size_t size) {
(*memcopy_uint16_uint8_function)(dest, src, size);
}
#else
// Copy memory area to disjoint memory area. // Copy memory area to disjoint memory area.
static void MemCopy(void* dest, const void* src, size_t size) { static void MemCopy(void* dest, const void* src, size_t size) {
memcpy(dest, src, size); memcpy(dest, src, size);
......
...@@ -418,6 +418,17 @@ enum CpuImplementer { ...@@ -418,6 +418,17 @@ enum CpuImplementer {
}; };
enum CpuPart {
CPU_UNKNOWN,
CORTEX_A15,
CORTEX_A12,
CORTEX_A9,
CORTEX_A8,
CORTEX_A7,
CORTEX_A5
};
// Feature flags bit positions. They are mostly based on the CPUID spec. // Feature flags bit positions. They are mostly based on the CPUID spec.
// (We assign CPUID itself to one of the currently reserved bits -- // (We assign CPUID itself to one of the currently reserved bits --
// feel free to change this if needed.) // feel free to change this if needed.)
...@@ -434,6 +445,7 @@ enum CpuFeature { SSE4_1 = 32 + 19, // x86 ...@@ -434,6 +445,7 @@ enum CpuFeature { SSE4_1 = 32 + 19, // x86
UNALIGNED_ACCESSES = 4, // ARM UNALIGNED_ACCESSES = 4, // ARM
MOVW_MOVT_IMMEDIATE_LOADS = 5, // ARM MOVW_MOVT_IMMEDIATE_LOADS = 5, // ARM
VFP32DREGS = 6, // ARM VFP32DREGS = 6, // ARM
NEON = 7, // ARM
SAHF = 0, // x86 SAHF = 0, // x86
FPU = 1}; // MIPS FPU = 1}; // MIPS
......
...@@ -317,6 +317,11 @@ template <typename sourcechar, typename sinkchar> ...@@ -317,6 +317,11 @@ template <typename sourcechar, typename sinkchar>
INLINE(static void CopyCharsUnsigned(sinkchar* dest, INLINE(static void CopyCharsUnsigned(sinkchar* dest,
const sourcechar* src, const sourcechar* src,
int chars)); int chars));
#if defined(V8_HOST_ARCH_ARM)
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
#endif
// Copy from ASCII/16bit chars to ASCII/16bit chars. // Copy from ASCII/16bit chars to ASCII/16bit chars.
template <typename sourcechar, typename sinkchar> template <typename sourcechar, typename sinkchar>
...@@ -375,6 +380,105 @@ void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) { ...@@ -375,6 +380,105 @@ void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) {
} }
#if defined(V8_HOST_ARCH_ARM)
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
case 1:
*dest = *src;
break;
case 2:
memcpy(dest, src, 2);
break;
case 3:
memcpy(dest, src, 3);
break;
case 4:
memcpy(dest, src, 4);
break;
case 5:
memcpy(dest, src, 5);
break;
case 6:
memcpy(dest, src, 6);
break;
case 7:
memcpy(dest, src, 7);
break;
case 8:
memcpy(dest, src, 8);
break;
case 9:
memcpy(dest, src, 9);
break;
case 10:
memcpy(dest, src, 10);
break;
case 11:
memcpy(dest, src, 11);
break;
case 12:
memcpy(dest, src, 12);
break;
case 13:
memcpy(dest, src, 13);
break;
case 14:
memcpy(dest, src, 14);
break;
case 15:
memcpy(dest, src, 15);
break;
default:
OS::MemCopy(dest, src, chars);
break;
}
}
void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars) {
if (chars >= OS::kMinComplexConvertMemCopy) {
OS::MemCopyUint16Uint8(dest, src, chars);
} else {
OS::MemCopyUint16Uint8Wrapper(dest, src, chars);
}
}
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
case 1:
*dest = *src;
break;
case 2:
memcpy(dest, src, 4);
break;
case 3:
memcpy(dest, src, 6);
break;
case 4:
memcpy(dest, src, 8);
break;
case 5:
memcpy(dest, src, 10);
break;
case 6:
memcpy(dest, src, 12);
break;
case 7:
memcpy(dest, src, 14);
break;
default:
OS::MemCopy(dest, src, chars * sizeof(*dest));
break;
}
}
#endif
class StringBuilder : public SimpleStringBuilder { class StringBuilder : public SimpleStringBuilder {
public: public:
explicit StringBuilder(int size) : SimpleStringBuilder(size) { } explicit StringBuilder(int size) : SimpleStringBuilder(size) { }
......
...@@ -1227,4 +1227,186 @@ TEST(14) { ...@@ -1227,4 +1227,186 @@ TEST(14) {
CHECK_EQ(kArmNanLower32, BitCast<int64_t>(t.div_result) & 0xffffffffu); CHECK_EQ(kArmNanLower32, BitCast<int64_t>(t.div_result) & 0xffffffffu);
} }
TEST(15) {
// Test the Neon instructions.
CcTest::InitializeVM();
Isolate* isolate = Isolate::Current();
HandleScope scope(isolate);
typedef struct {
uint32_t src0;
uint32_t src1;
uint32_t src2;
uint32_t src3;
uint32_t src4;
uint32_t src5;
uint32_t src6;
uint32_t src7;
uint32_t dst0;
uint32_t dst1;
uint32_t dst2;
uint32_t dst3;
uint32_t dst4;
uint32_t dst5;
uint32_t dst6;
uint32_t dst7;
uint32_t srcA0;
uint32_t srcA1;
uint32_t dstA0;
uint32_t dstA1;
uint32_t dstA2;
uint32_t dstA3;
} T;
T t;
// Create a function that accepts &t, and loads, manipulates, and stores
// the doubles and floats.
Assembler assm(isolate, NULL, 0);
if (CpuFeatures::IsSupported(NEON)) {
CpuFeatureScope scope(&assm, NEON);
__ stm(db_w, sp, r4.bit() | lr.bit());
// Move 32 bytes with neon.
__ add(r4, r0, Operand(OFFSET_OF(T, src0)));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4));
__ add(r4, r0, Operand(OFFSET_OF(T, dst0)));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4));
// Expand 8 bytes into 8 words(16 bits).
__ add(r4, r0, Operand(OFFSET_OF(T, srcA0)));
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(r4));
__ vmovl(NeonU8, q0, d0);
__ add(r4, r0, Operand(OFFSET_OF(T, dstA0)));
__ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(r4));
__ ldm(ia_w, sp, r4.bit() | pc.bit());
CodeDesc desc;
assm.GetCode(&desc);
Object* code = isolate->heap()->CreateCode(
desc,
Code::ComputeFlags(Code::STUB),
Handle<Code>())->ToObjectChecked();
CHECK(code->IsCode());
#ifdef DEBUG
Code::cast(code)->Print();
#endif
F3 f = FUNCTION_CAST<F3>(Code::cast(code)->entry());
t.src0 = 0x01020304;
t.src1 = 0x11121314;
t.src2 = 0x21222324;
t.src3 = 0x31323334;
t.src4 = 0x41424344;
t.src5 = 0x51525354;
t.src6 = 0x61626364;
t.src7 = 0x71727374;
t.dst0 = 0;
t.dst1 = 0;
t.dst2 = 0;
t.dst3 = 0;
t.dst4 = 0;
t.dst5 = 0;
t.dst6 = 0;
t.dst7 = 0;
t.srcA0 = 0x41424344;
t.srcA1 = 0x81828384;
t.dstA0 = 0;
t.dstA1 = 0;
t.dstA2 = 0;
t.dstA3 = 0;
Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(0x01020304, t.dst0);
CHECK_EQ(0x11121314, t.dst1);
CHECK_EQ(0x21222324, t.dst2);
CHECK_EQ(0x31323334, t.dst3);
CHECK_EQ(0x41424344, t.dst4);
CHECK_EQ(0x51525354, t.dst5);
CHECK_EQ(0x61626364, t.dst6);
CHECK_EQ(0x71727374, t.dst7);
CHECK_EQ(0x00430044, t.dstA0);
CHECK_EQ(0x00410042, t.dstA1);
CHECK_EQ(0x00830084, t.dstA2);
CHECK_EQ(0x00810082, t.dstA3);
}
}
TEST(16) {
// Test the pkh, uxtb, uxtab and uxtb16 instructions.
CcTest::InitializeVM();
Isolate* isolate = Isolate::Current();
HandleScope scope(isolate);
typedef struct {
uint32_t src0;
uint32_t src1;
uint32_t src2;
uint32_t dst0;
uint32_t dst1;
uint32_t dst2;
uint32_t dst3;
uint32_t dst4;
} T;
T t;
// Create a function that accepts &t, and loads, manipulates, and stores
// the doubles and floats.
Assembler assm(isolate, NULL, 0);
__ stm(db_w, sp, r4.bit() | lr.bit());
__ mov(r4, Operand(r0));
__ ldr(r0, MemOperand(r4, OFFSET_OF(T, src0)));
__ ldr(r1, MemOperand(r4, OFFSET_OF(T, src1)));
__ pkhbt(r2, r0, Operand(r1, LSL, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst0)));
__ pkhtb(r2, r0, Operand(r1, ASR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst1)));
__ uxtb16(r2, Operand(r0, ROR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst2)));
__ uxtb(r2, Operand(r0, ROR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst3)));
__ ldr(r0, MemOperand(r4, OFFSET_OF(T, src2)));
__ uxtab(r2, r0, Operand(r1, ROR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst4)));
__ ldm(ia_w, sp, r4.bit() | pc.bit());
CodeDesc desc;
assm.GetCode(&desc);
Object* code = isolate->heap()->CreateCode(
desc,
Code::ComputeFlags(Code::STUB),
Handle<Code>())->ToObjectChecked();
CHECK(code->IsCode());
#ifdef DEBUG
Code::cast(code)->Print();
#endif
F3 f = FUNCTION_CAST<F3>(Code::cast(code)->entry());
t.src0 = 0x01020304;
t.src1 = 0x11121314;
t.src2 = 0x11121300;
t.dst0 = 0;
t.dst1 = 0;
t.dst2 = 0;
t.dst3 = 0;
t.dst4 = 0;
Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(0x12130304, t.dst0);
CHECK_EQ(0x01021213, t.dst1);
CHECK_EQ(0x00010003, t.dst2);
CHECK_EQ(0x00000003, t.dst3);
CHECK_EQ(0x11121313, t.dst4);
}
#undef __ #undef __
...@@ -405,6 +405,17 @@ TEST(Type3) { ...@@ -405,6 +405,17 @@ TEST(Type3) {
"e6ff3f94 usat r3, #31, r4, lsl #31"); "e6ff3f94 usat r3, #31, r4, lsl #31");
COMPARE(usat(r8, 0, Operand(r5, ASR, 17)), COMPARE(usat(r8, 0, Operand(r5, ASR, 17)),
"e6e088d5 usat r8, #0, r5, asr #17"); "e6e088d5 usat r8, #0, r5, asr #17");
COMPARE(pkhbt(r3, r4, Operand(r5, LSL, 17)),
"e6843895 pkhbt r3, r4, r5, lsl #17");
COMPARE(pkhtb(r3, r4, Operand(r5, ASR, 17)),
"e68438d5 pkhtb r3, r4, r5, asr #17");
COMPARE(uxtb(r3, Operand(r4, ROR, 8)),
"e6ef3474 uxtb r3, r4, ror #8");
COMPARE(uxtab(r3, r4, Operand(r5, ROR, 8)),
"e6e43475 uxtab r3, r4, r5, ror #8");
COMPARE(uxtb16(r3, Operand(r4, ROR, 8)),
"e6cf3474 uxtb16 r3, r4, ror #8");
} }
VERIFY_RUN(); VERIFY_RUN();
...@@ -662,6 +673,23 @@ TEST(Vfp) { ...@@ -662,6 +673,23 @@ TEST(Vfp) {
} }
TEST(Neon) {
SET_UP();
if (CpuFeatures::IsSupported(NEON)) {
CpuFeatureScope scope(&assm, NEON);
COMPARE(vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(r1)),
"f421420f vld1.8 {d4, d5, d6, d7}, [r1]");
COMPARE(vst1(Neon16, NeonListOperand(d17, 4), NeonMemOperand(r9)),
"f449124f vst1.16 {d17, d18, d19, d20}, [r9]");
COMPARE(vmovl(NeonU8, q4, d2),
"f3884a12 vmovl.u8 q4, d2");
}
VERIFY_RUN();
}
TEST(LoadStore) { TEST(LoadStore) {
SET_UP(); SET_UP();
...@@ -858,6 +886,11 @@ TEST(LoadStore) { ...@@ -858,6 +886,11 @@ TEST(LoadStore) {
"e1eba7ff strd r10, [fp, #+127]!"); "e1eba7ff strd r10, [fp, #+127]!");
COMPARE(strd(ip, sp, MemOperand(sp, -127, PreIndex)), COMPARE(strd(ip, sp, MemOperand(sp, -127, PreIndex)),
"e16dc7ff strd ip, [sp, #-127]!"); "e16dc7ff strd ip, [sp, #-127]!");
COMPARE(pld(MemOperand(r1, 0)),
"f5d1f000 pld [r1]");
COMPARE(pld(MemOperand(r2, 128)),
"f5d2f080 pld [r2, #+128]");
} }
VERIFY_RUN(); VERIFY_RUN();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment