Commit d997955c authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32][wasm] Add packing integer conversions

I16x8SConvertI32x4, I16x8UConvertI32x4,
I8x16SConvertI16x8, I8x16UConvertI16x8

Add packsswb/packssdw/packuswb/packusdw

Change-Id: Ibb661a20fa032d732fec20b3d48190f44d2d4bd4
Reviewed-on: https://chromium-review.googlesource.com/1027123Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#52870}
parent 6399da58
......@@ -2265,6 +2265,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt8(1));
break;
}
case kSSEI16x8SConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ packssdw(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI16x8SConvertI32x4: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kSSEI16x8Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddw(i.OutputSimd128Register(), i.InputOperand(1));
......@@ -2419,6 +2430,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt8(1));
break;
}
case kSSEI16x8UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFFFFFF
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1);
__ pminud(dst, kScratchDoubleReg);
__ pminud(kScratchDoubleReg, i.InputOperand(1));
__ packusdw(dst, kScratchDoubleReg);
break;
}
case kAVXI16x8UConvertI32x4: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFFFFFF
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
__ vpminud(dst, kScratchDoubleReg, i.InputSimd128Register(0));
__ vpminud(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
__ vpackusdw(dst, dst, kScratchDoubleReg);
break;
}
case kSSEI16x8AddSaturateU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddusw(i.OutputSimd128Register(), i.InputOperand(1));
......@@ -2529,6 +2563,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEI8x16SConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ packsswb(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI8x16SConvertI16x8: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kIA32I8x16Neg: {
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(0);
......@@ -2866,6 +2911,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break;
}
case kSSEI8x16UConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFF
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ psrlw(kScratchDoubleReg, 1);
__ pminuw(dst, kScratchDoubleReg);
__ pminuw(kScratchDoubleReg, i.InputOperand(1));
__ packuswb(dst, kScratchDoubleReg);
break;
}
case kAVXI8x16UConvertI16x8: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFF
__ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1);
__ vpminuw(dst, kScratchDoubleReg, i.InputSimd128Register(0));
__ vpminuw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
__ vpackuswb(dst, dst, kScratchDoubleReg);
break;
}
case kSSEI8x16AddSaturateU: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
......
......@@ -198,6 +198,8 @@ namespace compiler {
V(AVXI16x8Shl) \
V(SSEI16x8ShrS) \
V(AVXI16x8ShrS) \
V(SSEI16x8SConvertI32x4) \
V(AVXI16x8SConvertI32x4) \
V(SSEI16x8Add) \
V(AVXI16x8Add) \
V(SSEI16x8AddSaturateS) \
......@@ -224,6 +226,8 @@ namespace compiler {
V(AVXI16x8GeS) \
V(SSEI16x8ShrU) \
V(AVXI16x8ShrU) \
V(SSEI16x8UConvertI32x4) \
V(AVXI16x8UConvertI32x4) \
V(SSEI16x8AddSaturateU) \
V(AVXI16x8AddSaturateU) \
V(SSEI16x8SubSaturateU) \
......@@ -240,11 +244,13 @@ namespace compiler {
V(IA32I8x16ExtractLane) \
V(SSEI8x16ReplaceLane) \
V(AVXI8x16ReplaceLane) \
V(SSEI8x16SConvertI16x8) \
V(AVXI8x16SConvertI16x8) \
V(IA32I8x16Neg) \
V(SSEI8x16Shl) \
V(AVXI8x16Shl) \
V(SSEI8x16ShrS) \
V(AVXI8x16ShrS) \
V(IA32I8x16Neg) \
V(SSEI8x16Add) \
V(AVXI8x16Add) \
V(SSEI8x16AddSaturateS) \
......@@ -267,6 +273,8 @@ namespace compiler {
V(AVXI8x16GtS) \
V(SSEI8x16GeS) \
V(AVXI8x16GeS) \
V(SSEI8x16UConvertI16x8) \
V(AVXI8x16UConvertI16x8) \
V(SSEI8x16AddSaturateU) \
V(AVXI8x16AddSaturateU) \
V(SSEI8x16SubSaturateU) \
......
......@@ -180,6 +180,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI16x8Shl:
case kSSEI16x8ShrS:
case kAVXI16x8ShrS:
case kSSEI16x8SConvertI32x4:
case kAVXI16x8SConvertI32x4:
case kSSEI16x8Add:
case kAVXI16x8Add:
case kSSEI16x8AddSaturateS:
......@@ -206,6 +208,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI16x8GeS:
case kSSEI16x8ShrU:
case kAVXI16x8ShrU:
case kSSEI16x8UConvertI32x4:
case kAVXI16x8UConvertI32x4:
case kSSEI16x8AddSaturateU:
case kAVXI16x8AddSaturateU:
case kSSEI16x8SubSaturateU:
......@@ -222,6 +226,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I8x16ExtractLane:
case kSSEI8x16ReplaceLane:
case kAVXI8x16ReplaceLane:
case kSSEI8x16SConvertI16x8:
case kAVXI8x16SConvertI16x8:
case kIA32I8x16Neg:
case kSSEI8x16Shl:
case kAVXI8x16Shl:
......@@ -249,6 +255,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI8x16GtS:
case kSSEI8x16GeS:
case kAVXI8x16GeS:
case kSSEI8x16UConvertI16x8:
case kAVXI8x16UConvertI16x8:
case kSSEI8x16AddSaturateU:
case kAVXI8x16AddSaturateU:
case kSSEI8x16SubSaturateU:
......
......@@ -1757,6 +1757,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4MaxU) \
V(I32x4GtU) \
V(I32x4GeU) \
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSaturateS) \
V(I16x8AddHoriz) \
......@@ -1769,12 +1770,14 @@ VISIT_ATOMIC_BINOP(Xor)
V(I16x8Ne) \
V(I16x8GtS) \
V(I16x8GeS) \
V(I16x8UConvertI32x4) \
V(I16x8AddSaturateU) \
V(I16x8SubSaturateU) \
V(I16x8MinU) \
V(I16x8MaxU) \
V(I16x8GtU) \
V(I16x8GeU) \
V(I8x16SConvertI16x8) \
V(I8x16Add) \
V(I8x16AddSaturateS) \
V(I8x16Sub) \
......@@ -1785,6 +1788,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16Ne) \
V(I8x16GtS) \
V(I8x16GeS) \
V(I8x16UConvertI16x8) \
V(I8x16AddSaturateU) \
V(I8x16SubSaturateU) \
V(I8x16MinU) \
......
......@@ -2381,23 +2381,23 @@ void InstructionSelector::VisitI16x8SConvertI8x16High(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8SConvertI32x4(Node* node) {
void InstructionSelector::VisitI16x8UConvertI8x16Low(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8UConvertI8x16Low(Node* node) {
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI16x8SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
......@@ -2409,7 +2409,7 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
......
......@@ -6,6 +6,9 @@
#define V8_IA32_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \
V(packsswb, 66, 0F, 63) \
V(packssdw, 66, 0F, 6B) \
V(packuswb, 66, 0F, 67) \
V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \
......@@ -52,6 +55,7 @@
V(psignd, 66, 0F, 38, 0A)
#define SSE4_INSTRUCTION_LIST(V) \
V(packusdw, 66, 0F, 38, 2B) \
V(pminsb, 66, 0F, 38, 38) \
V(pminsd, 66, 0F, 38, 39) \
V(pminuw, 66, 0F, 38, 3A) \
......
......@@ -1087,7 +1087,7 @@ WASM_SIMD_TEST(I16x8Neg) {
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion from I32x4 (packing).
WASM_SIMD_TEST(I16x8ConvertI32x4) {
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(kExecuteTurbofan,
......@@ -1117,7 +1117,7 @@ WASM_SIMD_TEST(I16x8ConvertI32x4) {
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunI16x8BinOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
Int16BinOp expected_op) {
......@@ -1281,7 +1281,7 @@ WASM_SIMD_TEST(I8x16Neg) {
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion from I16x8 (packing).
WASM_SIMD_TEST(I8x16ConvertI16x8) {
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(kExecuteTurbofan,
......@@ -1311,7 +1311,7 @@ WASM_SIMD_TEST(I8x16ConvertI16x8) {
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunI8x16BinOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
Int8BinOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment