Commit 2857e783 authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32][wasm] Add I32x4AddHoriz and I16x8AddHoriz

Add phaddd, phaddw and AVX version
Add vmovdqu and Movdqu macro

Bug: 
Change-Id: I4f5c0cf96ab481fc18f0a0d554101a996a16c954
Reviewed-on: https://chromium-review.googlesource.com/715677
Commit-Queue: Jing Bao <jing.bao@intel.com>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48621}
parent b0a8e5f1
...@@ -1863,6 +1863,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1863,6 +1863,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movss(operand, i.InputDoubleRegister(index)); __ movss(operand, i.InputDoubleRegister(index));
} }
break; break;
case kIA32Movdqu:
if (instr->HasOutput()) {
__ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Movdqu(operand, i.InputSimd128Register(index));
}
break;
case kIA32BitcastFI: case kIA32BitcastFI:
if (instr->InputAt(0)->IsFPStackSlot()) { if (instr->InputAt(0)->IsFPStackSlot()) {
__ mov(i.OutputRegister(), i.InputOperand(0)); __ mov(i.OutputRegister(), i.InputOperand(0));
...@@ -2040,6 +2049,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2040,6 +2049,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1)); i.InputOperand(1));
break; break;
} }
case kSSEI32x4AddHoriz: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
__ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI32x4AddHoriz: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI32x4Sub: { case kSSEI32x4Sub: {
__ psubd(i.OutputSimd128Register(), i.InputOperand(1)); __ psubd(i.OutputSimd128Register(), i.InputOperand(1));
break; break;
...@@ -2226,6 +2246,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2226,6 +2246,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1)); i.InputOperand(2), i.InputInt8(1));
break; break;
} }
case kSSEI16x8AddHoriz: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
__ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI16x8AddHoriz: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32I8x16Splat: { case kIA32I8x16Splat: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0)); __ Movd(dst, i.InputOperand(0));
......
...@@ -103,6 +103,7 @@ namespace compiler { ...@@ -103,6 +103,7 @@ namespace compiler {
V(IA32Movl) \ V(IA32Movl) \
V(IA32Movss) \ V(IA32Movss) \
V(IA32Movsd) \ V(IA32Movsd) \
V(IA32Movdqu) \
V(IA32BitcastFI) \ V(IA32BitcastFI) \
V(IA32BitcastIF) \ V(IA32BitcastIF) \
V(IA32Lea) \ V(IA32Lea) \
...@@ -122,6 +123,8 @@ namespace compiler { ...@@ -122,6 +123,8 @@ namespace compiler {
V(AVXI32x4ShrS) \ V(AVXI32x4ShrS) \
V(SSEI32x4Add) \ V(SSEI32x4Add) \
V(AVXI32x4Add) \ V(AVXI32x4Add) \
V(SSEI32x4AddHoriz) \
V(AVXI32x4AddHoriz) \
V(SSEI32x4Sub) \ V(SSEI32x4Sub) \
V(AVXI32x4Sub) \ V(AVXI32x4Sub) \
V(SSEI32x4Mul) \ V(SSEI32x4Mul) \
...@@ -152,6 +155,8 @@ namespace compiler { ...@@ -152,6 +155,8 @@ namespace compiler {
V(IA32I16x8ExtractLane) \ V(IA32I16x8ExtractLane) \
V(SSEI16x8ReplaceLane) \ V(SSEI16x8ReplaceLane) \
V(AVXI16x8ReplaceLane) \ V(AVXI16x8ReplaceLane) \
V(SSEI16x8AddHoriz) \
V(AVXI16x8AddHoriz) \
V(IA32I8x16Splat) \ V(IA32I8x16Splat) \
V(IA32I8x16ExtractLane) \ V(IA32I8x16ExtractLane) \
V(SSEI8x16ReplaceLane) \ V(SSEI8x16ReplaceLane) \
......
...@@ -108,6 +108,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -108,6 +108,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4ShrS: case kAVXI32x4ShrS:
case kSSEI32x4Add: case kSSEI32x4Add:
case kAVXI32x4Add: case kAVXI32x4Add:
case kSSEI32x4AddHoriz:
case kAVXI32x4AddHoriz:
case kSSEI32x4Sub: case kSSEI32x4Sub:
case kAVXI32x4Sub: case kAVXI32x4Sub:
case kSSEI32x4Mul: case kSSEI32x4Mul:
...@@ -138,6 +140,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -138,6 +140,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I16x8ExtractLane: case kIA32I16x8ExtractLane:
case kSSEI16x8ReplaceLane: case kSSEI16x8ReplaceLane:
case kAVXI16x8ReplaceLane: case kAVXI16x8ReplaceLane:
case kSSEI16x8AddHoriz:
case kAVXI16x8AddHoriz:
case kIA32I8x16Splat: case kIA32I8x16Splat:
case kIA32I8x16ExtractLane: case kIA32I8x16ExtractLane:
case kSSEI8x16ReplaceLane: case kSSEI8x16ReplaceLane:
...@@ -161,6 +165,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -161,6 +165,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Movl: case kIA32Movl:
case kIA32Movss: case kIA32Movss:
case kIA32Movsd: case kIA32Movsd:
case kIA32Movdqu:
// Moves are used for memory load/store operations. // Moves are used for memory load/store operations.
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect; return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
......
...@@ -250,8 +250,10 @@ void InstructionSelector::VisitLoad(Node* node) { ...@@ -250,8 +250,10 @@ void InstructionSelector::VisitLoad(Node* node) {
case MachineRepresentation::kWord32: case MachineRepresentation::kWord32:
opcode = kIA32Movl; opcode = kIA32Movl;
break; break;
case MachineRepresentation::kSimd128:
opcode = kIA32Movdqu;
break;
case MachineRepresentation::kWord64: // Fall through. case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd128: // Fall through.
case MachineRepresentation::kNone: case MachineRepresentation::kNone:
UNREACHABLE(); UNREACHABLE();
return; return;
...@@ -340,8 +342,10 @@ void InstructionSelector::VisitStore(Node* node) { ...@@ -340,8 +342,10 @@ void InstructionSelector::VisitStore(Node* node) {
case MachineRepresentation::kWord32: case MachineRepresentation::kWord32:
opcode = kIA32Movl; opcode = kIA32Movl;
break; break;
case MachineRepresentation::kSimd128:
opcode = kIA32Movdqu;
break;
case MachineRepresentation::kWord64: // Fall through. case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd128: // Fall through.
case MachineRepresentation::kNone: case MachineRepresentation::kNone:
UNREACHABLE(); UNREACHABLE();
return; return;
...@@ -1905,6 +1909,7 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -1905,6 +1909,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_BINOP_LIST(V) \ #define SIMD_BINOP_LIST(V) \
V(I32x4Add) \ V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \ V(I32x4Sub) \
V(I32x4Mul) \ V(I32x4Mul) \
V(I32x4MinS) \ V(I32x4MinS) \
...@@ -1916,7 +1921,8 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -1916,7 +1921,8 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4MinU) \ V(I32x4MinU) \
V(I32x4MaxU) \ V(I32x4MaxU) \
V(I32x4GtU) \ V(I32x4GtU) \
V(I32x4GeU) V(I32x4GeU) \
V(I16x8AddHoriz)
#define SIMD_UNOP_LIST(V) V(I32x4Neg) #define SIMD_UNOP_LIST(V) V(I32x4Neg)
......
...@@ -2150,15 +2150,11 @@ void InstructionSelector::VisitI32x4MinU(Node* node) { UNIMPLEMENTED(); } ...@@ -2150,15 +2150,11 @@ void InstructionSelector::VisitI32x4MinU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4MaxU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4MaxU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 // && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \ #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 !V8_TARGET_ARCH_MIPS64
...@@ -2248,10 +2244,11 @@ void InstructionSelector::VisitI16x8SubSaturateS(Node* node) { ...@@ -2248,10 +2244,11 @@ void InstructionSelector::VisitI16x8SubSaturateS(Node* node) {
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 // && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \ #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 // && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \ #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
......
...@@ -1453,6 +1453,12 @@ class Assembler : public AssemblerBase { ...@@ -1453,6 +1453,12 @@ class Assembler : public AssemblerBase {
vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG); vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
} }
void vmovdqu(XMMRegister dst, const Operand& src) {
vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
}
void vmovdqu(const Operand& dst, XMMRegister src) {
vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG);
}
void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); } void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
void vmovd(XMMRegister dst, const Operand& src) { void vmovd(XMMRegister dst, const Operand& src) {
vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG); vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
......
...@@ -941,6 +941,15 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -941,6 +941,15 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
break; break;
case 0x6f:
AppendToBuffer("vmovdqu %s,", NameOfXMMRegister(regop));
current += PrintRightOperand(current);
break;
case 0x7f:
AppendToBuffer("vmovdqu ");
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
default: default:
UnimplementedInstruction(); UnimplementedInstruction();
} }
......
...@@ -214,6 +214,8 @@ class TurboAssembler : public Assembler { ...@@ -214,6 +214,8 @@ class TurboAssembler : public Assembler {
} \ } \
} }
AVX_OP2_WITH_TYPE(Movdqu, movdqu, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movdqu, movdqu, const Operand&, XMMRegister)
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Register) AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Register)
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, const Operand&) AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister) AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
......
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
V(pxor, 66, 0F, EF) V(pxor, 66, 0F, EF)
#define SSSE3_INSTRUCTION_LIST(V) \ #define SSSE3_INSTRUCTION_LIST(V) \
V(phaddd, 66, 0F, 38, 02) \
V(phaddw, 66, 0F, 38, 01) \
V(pshufb, 66, 0F, 38, 00) \ V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \ V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \ V(psignw, 66, 0F, 38, 09) \
......
...@@ -667,6 +667,8 @@ TEST(DisasmIa320) { ...@@ -667,6 +667,8 @@ TEST(DisasmIa320) {
__ vcvttps2dq(xmm1, xmm0); __ vcvttps2dq(xmm1, xmm0);
__ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000)); __ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ vmovd(xmm0, edi); __ vmovd(xmm0, edi);
__ vmovd(xmm0, Operand(ebx, ecx, times_4, 10000)); __ vmovd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovd(eax, xmm1); __ vmovd(eax, xmm1);
......
...@@ -1610,6 +1610,8 @@ WASM_SIMD_SELECT_TEST(8x16) ...@@ -1610,6 +1610,8 @@ WASM_SIMD_SELECT_TEST(8x16)
WASM_SIMD_NON_CANONICAL_SELECT_TEST(32x4) WASM_SIMD_NON_CANONICAL_SELECT_TEST(32x4)
WASM_SIMD_NON_CANONICAL_SELECT_TEST(16x8) WASM_SIMD_NON_CANONICAL_SELECT_TEST(16x8)
WASM_SIMD_NON_CANONICAL_SELECT_TEST(8x16) WASM_SIMD_NON_CANONICAL_SELECT_TEST(8x16)
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
// Test binary ops with two lane test patterns, all lanes distinct. // Test binary ops with two lane test patterns, all lanes distinct.
template <typename T> template <typename T>
...@@ -1653,8 +1655,6 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) { ...@@ -1653,8 +1655,6 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) {
RunBinaryLaneOpTest<int16_t>(execution_mode, kExprI16x8AddHoriz, RunBinaryLaneOpTest<int16_t>(execution_mode, kExprI16x8AddHoriz,
{{1, 5, 9, 13, 17, 21, 25, 29}}); {{1, 5, 9, 13, 17, 21, 25, 29}});
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \ #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 V8_TARGET_ARCH_MIPS64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment