Commit 539fee65 authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32][wasm] Add F32x4SConvertI32x4, F32x4UConvertI32x4

Add Cvtdq2ps macro.
Add pblendw/vpblendw.

Change-Id: I5c8232d17c220fbbb4845cbfad4ce765f0bbbb90
Reviewed-on: https://chromium-review.googlesource.com/961973
Commit-Queue: Jing Bao <jing.bao@intel.com>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#52255}
parent caf74f9c
...@@ -1768,6 +1768,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1768,6 +1768,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1) << 4); i.InputOperand(2), i.InputInt8(1) << 4);
break; break;
} }
case kIA32F32x4SConvertI32x4: {
__ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kSSEF32x4UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
__ psubd(dst, kScratchDoubleReg); // get hi 16 bits
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ psrld(dst, 1); // divide by 2 to get in unsigned range
__ cvtdq2ps(dst, dst); // convert hi exactly
__ addps(dst, dst); // double hi, exactly
__ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kAVXF32x4UConvertI32x4: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
__ vpxor(kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg); // zeros
__ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
0x55); // get lo 16 bits
__ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
__ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
__ vcvtdq2ps(dst, dst); // convert hi exactly
__ vaddps(dst, dst, dst); // double hi, exactly
__ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kSSEF32x4Abs: { case kSSEF32x4Abs: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(0); Operand src = i.InputOperand(0);
......
...@@ -121,6 +121,9 @@ namespace compiler { ...@@ -121,6 +121,9 @@ namespace compiler {
V(AVXF32x4ExtractLane) \ V(AVXF32x4ExtractLane) \
V(SSEF32x4ReplaceLane) \ V(SSEF32x4ReplaceLane) \
V(AVXF32x4ReplaceLane) \ V(AVXF32x4ReplaceLane) \
V(IA32F32x4SConvertI32x4) \
V(SSEF32x4UConvertI32x4) \
V(AVXF32x4UConvertI32x4) \
V(SSEF32x4Abs) \ V(SSEF32x4Abs) \
V(AVXF32x4Abs) \ V(AVXF32x4Abs) \
V(SSEF32x4Neg) \ V(SSEF32x4Neg) \
......
...@@ -103,6 +103,9 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -103,6 +103,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4ExtractLane: case kAVXF32x4ExtractLane:
case kSSEF32x4ReplaceLane: case kSSEF32x4ReplaceLane:
case kAVXF32x4ReplaceLane: case kAVXF32x4ReplaceLane:
case kIA32F32x4SConvertI32x4:
case kSSEF32x4UConvertI32x4:
case kAVXF32x4UConvertI32x4:
case kSSEF32x4Abs: case kSSEF32x4Abs:
case kAVXF32x4Abs: case kAVXF32x4Abs:
case kSSEF32x4Neg: case kSSEF32x4Neg:
......
...@@ -1793,6 +1793,7 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -1793,6 +1793,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Xor) V(S128Xor)
#define SIMD_INT_UNOP_LIST(V) \ #define SIMD_INT_UNOP_LIST(V) \
V(F32x4SConvertI32x4) \
V(I32x4Neg) \ V(I32x4Neg) \
V(I16x8Neg) \ V(I16x8Neg) \
V(I8x16Neg) V(I8x16Neg)
...@@ -1832,6 +1833,16 @@ void InstructionSelector::VisitF32x4ExtractLane(Node* node) { ...@@ -1832,6 +1833,16 @@ void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
} }
} }
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
if (IsSupported(AVX)) {
Emit(kAVXF32x4UConvertI32x4, g.DefineAsRegister(node), operand0);
} else {
Emit(kSSEF32x4UConvertI32x4, g.DefineSameAsFirst(node), operand0);
}
}
#define SIMD_I8X16_SHIFT_OPCODES(V) \ #define SIMD_I8X16_SHIFT_OPCODES(V) \
V(I8x16Shl) \ V(I8x16Shl) \
V(I8x16ShrS) \ V(I8x16ShrS) \
......
...@@ -2306,7 +2306,7 @@ void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); } ...@@ -2306,7 +2306,7 @@ void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT #endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \ #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) { void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
...@@ -2315,7 +2315,7 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { ...@@ -2315,7 +2315,7 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS #endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); }
......
...@@ -2725,6 +2725,17 @@ void Assembler::pshufd(XMMRegister dst, Operand src, uint8_t shuffle) { ...@@ -2725,6 +2725,17 @@ void Assembler::pshufd(XMMRegister dst, Operand src, uint8_t shuffle) {
EMIT(shuffle); EMIT(shuffle);
} }
void Assembler::pblendw(XMMRegister dst, Operand src, uint8_t mask) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x0E);
emit_sse_operand(dst, src);
EMIT(mask);
}
void Assembler::pextrb(Operand dst, XMMRegister src, int8_t offset) { void Assembler::pextrb(Operand dst, XMMRegister src, int8_t offset) {
DCHECK(IsEnabled(SSE4_1)); DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
...@@ -2959,6 +2970,12 @@ void Assembler::vpshufd(XMMRegister dst, Operand src, uint8_t shuffle) { ...@@ -2959,6 +2970,12 @@ void Assembler::vpshufd(XMMRegister dst, Operand src, uint8_t shuffle) {
EMIT(shuffle); EMIT(shuffle);
} }
void Assembler::vpblendw(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t mask) {
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
EMIT(mask);
}
void Assembler::vpextrb(Operand dst, XMMRegister src, int8_t offset) { void Assembler::vpextrb(Operand dst, XMMRegister src, int8_t offset) {
vinstr(0x14, src, xmm0, dst, k66, k0F3A, kWIG); vinstr(0x14, src, xmm0, dst, k66, k0F3A, kWIG);
EMIT(offset); EMIT(offset);
......
...@@ -1131,6 +1131,11 @@ class Assembler : public AssemblerBase { ...@@ -1131,6 +1131,11 @@ class Assembler : public AssemblerBase {
} }
void pshufd(XMMRegister dst, Operand src, uint8_t shuffle); void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
pblendw(dst, Operand(src), mask);
}
void pblendw(XMMRegister dst, Operand src, uint8_t mask);
void pextrb(Register dst, XMMRegister src, int8_t offset) { void pextrb(Register dst, XMMRegister src, int8_t offset) {
pextrb(Operand(dst), src, offset); pextrb(Operand(dst), src, offset);
} }
...@@ -1439,6 +1444,12 @@ class Assembler : public AssemblerBase { ...@@ -1439,6 +1444,12 @@ class Assembler : public AssemblerBase {
} }
void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle); void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
uint8_t mask) {
vpblendw(dst, src1, Operand(src2), mask);
}
void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
void vpextrb(Register dst, XMMRegister src, int8_t offset) { void vpextrb(Register dst, XMMRegister src, int8_t offset) {
vpextrb(Operand(dst), src, offset); vpextrb(Operand(dst), src, offset);
} }
......
...@@ -819,6 +819,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -819,6 +819,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg(); int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
switch (opcode) { switch (opcode) {
case 0x0E:
AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", *reinterpret_cast<uint8_t*>(current));
current++;
break;
case 0x14: case 0x14:
AppendToBuffer("vpextrb "); AppendToBuffer("vpextrb ");
current += PrintRightOperand(current); current += PrintRightOperand(current);
...@@ -1961,6 +1968,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1961,6 +1968,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(rm), NameOfXMMRegister(rm),
static_cast<int>(imm8)); static_cast<int>(imm8));
data += 2; data += 2;
} else if (*data == 0x0E) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pblendw %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<uint8_t*>(data));
data++;
} else if (*data == 0x14) { } else if (*data == 0x14) {
data++; data++;
int mod, regop, rm; int mod, regop, rm;
......
...@@ -228,6 +228,7 @@ class TurboAssembler : public Assembler { ...@@ -228,6 +228,7 @@ class TurboAssembler : public Assembler {
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Operand) AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Operand)
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister) AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister) AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister)
AVX_OP2_WITH_TYPE(Cvtdq2ps, cvtdq2ps, XMMRegister, Operand)
#undef AVX_OP2_WITH_TYPE #undef AVX_OP2_WITH_TYPE
......
...@@ -573,6 +573,8 @@ TEST(DisasmIa320) { ...@@ -573,6 +573,8 @@ TEST(DisasmIa320) {
{ {
if (CpuFeatures::IsSupported(SSE4_1)) { if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope scope(&assm, SSE4_1); CpuFeatureScope scope(&assm, SSE4_1);
__ pblendw(xmm5, xmm1, 5);
__ pblendw(xmm5, Operand(edx, 4), 5);
__ pextrb(eax, xmm0, 1); __ pextrb(eax, xmm0, 1);
__ pextrb(Operand(edx, 4), xmm0, 1); __ pextrb(Operand(edx, 4), xmm0, 1);
__ pextrw(eax, xmm0, 1); __ pextrw(eax, xmm0, 1);
...@@ -689,6 +691,8 @@ TEST(DisasmIa320) { ...@@ -689,6 +691,8 @@ TEST(DisasmIa320) {
__ vpshuflw(xmm5, Operand(edx, 4), 5); __ vpshuflw(xmm5, Operand(edx, 4), 5);
__ vpshufd(xmm5, xmm1, 5); __ vpshufd(xmm5, xmm1, 5);
__ vpshufd(xmm5, Operand(edx, 4), 5); __ vpshufd(xmm5, Operand(edx, 4), 5);
__ vpblendw(xmm5, xmm1, xmm0, 5);
__ vpblendw(xmm5, xmm1, Operand(edx, 4), 5);
__ vpextrb(eax, xmm0, 1); __ vpextrb(eax, xmm0, 1);
__ vpextrb(Operand(edx, 4), xmm0, 1); __ vpextrb(Operand(edx, 4), xmm0, 1);
__ vpextrw(eax, xmm0, 1); __ vpextrw(eax, xmm0, 1);
......
...@@ -446,7 +446,7 @@ WASM_SIMD_TEST(F32x4ReplaceLane) { ...@@ -446,7 +446,7 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
} }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \ #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion. // Tests both signed and unsigned conversion.
WASM_SIMD_TEST(F32x4ConvertI32x4) { WASM_SIMD_TEST(F32x4ConvertI32x4) {
WasmRunner<int32_t, int32_t, float, float> r(kExecuteTurbofan, lower_simd); WasmRunner<int32_t, int32_t, float, float> r(kExecuteTurbofan, lower_simd);
...@@ -471,7 +471,7 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) { ...@@ -471,7 +471,7 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) {
} }
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || #endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 // V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op, void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
FloatUnOp expected_op, float error = 0.0f) { FloatUnOp expected_op, float error = 0.0f) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment