Commit 2857e783 authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32][wasm] Add I32x4AddHoriz and I16x8AddHoriz

Add phaddd, phaddw and AVX version
Add vmovdqu and Movdqu macro

Bug: 
Change-Id: I4f5c0cf96ab481fc18f0a0d554101a996a16c954
Reviewed-on: https://chromium-review.googlesource.com/715677
Commit-Queue: Jing Bao <jing.bao@intel.com>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48621}
parent b0a8e5f1
......@@ -1863,6 +1863,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movss(operand, i.InputDoubleRegister(index));
}
break;
case kIA32Movdqu:
if (instr->HasOutput()) {
__ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Movdqu(operand, i.InputSimd128Register(index));
}
break;
case kIA32BitcastFI:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ mov(i.OutputRegister(), i.InputOperand(0));
......@@ -2040,6 +2049,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(1));
break;
}
case kSSEI32x4AddHoriz: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
__ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI32x4AddHoriz: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI32x4Sub: {
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
break;
......@@ -2226,6 +2246,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEI16x8AddHoriz: {
CpuFeatureScope sse_scope(tasm(), SSSE3);
__ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI16x8AddHoriz: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32I8x16Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
......
......@@ -103,6 +103,7 @@ namespace compiler {
V(IA32Movl) \
V(IA32Movss) \
V(IA32Movsd) \
V(IA32Movdqu) \
V(IA32BitcastFI) \
V(IA32BitcastIF) \
V(IA32Lea) \
......@@ -122,6 +123,8 @@ namespace compiler {
V(AVXI32x4ShrS) \
V(SSEI32x4Add) \
V(AVXI32x4Add) \
V(SSEI32x4AddHoriz) \
V(AVXI32x4AddHoriz) \
V(SSEI32x4Sub) \
V(AVXI32x4Sub) \
V(SSEI32x4Mul) \
......@@ -152,6 +155,8 @@ namespace compiler {
V(IA32I16x8ExtractLane) \
V(SSEI16x8ReplaceLane) \
V(AVXI16x8ReplaceLane) \
V(SSEI16x8AddHoriz) \
V(AVXI16x8AddHoriz) \
V(IA32I8x16Splat) \
V(IA32I8x16ExtractLane) \
V(SSEI8x16ReplaceLane) \
......
......@@ -108,6 +108,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4ShrS:
case kSSEI32x4Add:
case kAVXI32x4Add:
case kSSEI32x4AddHoriz:
case kAVXI32x4AddHoriz:
case kSSEI32x4Sub:
case kAVXI32x4Sub:
case kSSEI32x4Mul:
......@@ -138,6 +140,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I16x8ExtractLane:
case kSSEI16x8ReplaceLane:
case kAVXI16x8ReplaceLane:
case kSSEI16x8AddHoriz:
case kAVXI16x8AddHoriz:
case kIA32I8x16Splat:
case kIA32I8x16ExtractLane:
case kSSEI8x16ReplaceLane:
......@@ -161,6 +165,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Movl:
case kIA32Movss:
case kIA32Movsd:
case kIA32Movdqu:
// Moves are used for memory load/store operations.
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
......
......@@ -250,8 +250,10 @@ void InstructionSelector::VisitLoad(Node* node) {
case MachineRepresentation::kWord32:
opcode = kIA32Movl;
break;
case MachineRepresentation::kSimd128:
opcode = kIA32Movdqu;
break;
case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd128: // Fall through.
case MachineRepresentation::kNone:
UNREACHABLE();
return;
......@@ -340,8 +342,10 @@ void InstructionSelector::VisitStore(Node* node) {
case MachineRepresentation::kWord32:
opcode = kIA32Movl;
break;
case MachineRepresentation::kSimd128:
opcode = kIA32Movdqu;
break;
case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd128: // Fall through.
case MachineRepresentation::kNone:
UNREACHABLE();
return;
......@@ -1905,6 +1909,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_BINOP_LIST(V) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
......@@ -1916,7 +1921,8 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4MinU) \
V(I32x4MaxU) \
V(I32x4GtU) \
V(I32x4GeU)
V(I32x4GeU) \
V(I16x8AddHoriz)
#define SIMD_UNOP_LIST(V) V(I32x4Neg)
......
......@@ -2150,15 +2150,11 @@ void InstructionSelector::VisitI32x4MinU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4MaxU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
......@@ -2248,10 +2244,11 @@ void InstructionSelector::VisitI16x8SubSaturateS(Node* node) {
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
!V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
......
......@@ -1453,6 +1453,12 @@ class Assembler : public AssemblerBase {
vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
}
void vmovdqu(XMMRegister dst, const Operand& src) {
vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
}
void vmovdqu(const Operand& dst, XMMRegister src) {
vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG);
}
void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
void vmovd(XMMRegister dst, const Operand& src) {
vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
......
......@@ -941,6 +941,15 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x6f:
AppendToBuffer("vmovdqu %s,", NameOfXMMRegister(regop));
current += PrintRightOperand(current);
break;
case 0x7f:
AppendToBuffer("vmovdqu ");
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
default:
UnimplementedInstruction();
}
......
......@@ -214,6 +214,8 @@ class TurboAssembler : public Assembler {
} \
}
AVX_OP2_WITH_TYPE(Movdqu, movdqu, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movdqu, movdqu, const Operand&, XMMRegister)
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Register)
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
......
......@@ -44,6 +44,8 @@
V(pxor, 66, 0F, EF)
#define SSSE3_INSTRUCTION_LIST(V) \
V(phaddd, 66, 0F, 38, 02) \
V(phaddw, 66, 0F, 38, 01) \
V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \
......
......@@ -667,6 +667,8 @@ TEST(DisasmIa320) {
__ vcvttps2dq(xmm1, xmm0);
__ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
__ vmovd(xmm0, edi);
__ vmovd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovd(eax, xmm1);
......
......@@ -1610,6 +1610,8 @@ WASM_SIMD_SELECT_TEST(8x16)
WASM_SIMD_NON_CANONICAL_SELECT_TEST(32x4)
WASM_SIMD_NON_CANONICAL_SELECT_TEST(16x8)
WASM_SIMD_NON_CANONICAL_SELECT_TEST(8x16)
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
// Test binary ops with two lane test patterns, all lanes distinct.
template <typename T>
......@@ -1653,8 +1655,6 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) {
RunBinaryLaneOpTest<int16_t>(execution_mode, kExprI16x8AddHoriz,
{{1, 5, 9, 13, 17, 21, 25, 29}});
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment