Commit d25b4f29 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Bitmask instructions

Implement i8x16.bitmask, i16x8.bitmask, i32x4.bitmask on ia32.

Drive by additions of disasm and disasm tests to some instructions.

Bug: v8:10308
Change-Id: I3725ed6959ae55f96ee7950130776a4f08e177c9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2127314Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66989}
parent 15674cef
......@@ -2266,6 +2266,14 @@ void Assembler::movmskps(Register dst, XMMRegister src) {
emit_sse_operand(dst, src);
}
void Assembler::pmovmskb(Register dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xD7);
emit_sse_operand(dst, src);
}
void Assembler::maxsd(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0xF2);
......@@ -2894,6 +2902,22 @@ void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2,
EMIT(offset);
}
void Assembler::vmovmskps(Register dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(xmm0, kL128, kNone, k0F, kWIG);
EMIT(0x50);
emit_sse_operand(dst, src);
}
void Assembler::vpmovmskb(Register dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(xmm0, kL128, k66, k0F, kWIG);
EMIT(0xD7);
emit_sse_operand(dst, src);
}
void Assembler::bmi1(byte op, Register reg, Register vreg, Operand rm) {
DCHECK(IsEnabled(BMI1));
EnsureSpace ensure_space(this);
......
......@@ -958,6 +958,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void movmskpd(Register dst, XMMRegister src);
void movmskps(Register dst, XMMRegister src);
void pmovmskb(Register dst, XMMRegister src);
void cmpltsd(XMMRegister dst, XMMRegister src);
void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); }
......@@ -1439,6 +1441,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
}
void vmovmskps(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src);
// BMI instruction
void andn(Register dst, Register src1, Register src2) {
andn(dst, src1, Operand(src2));
......
......@@ -284,6 +284,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP2_WITH_TYPE(Movaps, movaps, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Pmovmskb, pmovmskb, Register, XMMRegister)
AVX_OP2_WITH_TYPE(Movmskps, movmskps, Register, XMMRegister)
#undef AVX_OP2_WITH_TYPE
......
......@@ -2759,6 +2759,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32I32x4BitMask: {
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
case kIA32I16x8Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
......@@ -3093,6 +3097,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32I16x8BitMask: {
Register dst = i.OutputRegister();
XMMRegister tmp = i.TempSimd128Register(0);
__ Packsswb(tmp, i.InputSimd128Register(0));
__ Pmovmskb(dst, tmp);
__ shr(dst, 8);
break;
}
case kIA32I8x16Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
......@@ -3546,6 +3558,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32I8x16BitMask: {
__ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
case kIA32S128Zero: {
XMMRegister dst = i.OutputSimd128Register();
__ Pxor(dst, dst);
......
......@@ -226,6 +226,7 @@ namespace compiler {
V(SSEI32x4GeU) \
V(AVXI32x4GeU) \
V(IA32I32x4Abs) \
V(IA32I32x4BitMask) \
V(IA32I16x8Splat) \
V(IA32I16x8ExtractLaneU) \
V(IA32I16x8ExtractLaneS) \
......@@ -281,6 +282,7 @@ namespace compiler {
V(AVXI16x8GeU) \
V(IA32I16x8RoundingAverageU) \
V(IA32I16x8Abs) \
V(IA32I16x8BitMask) \
V(IA32I8x16Splat) \
V(IA32I8x16ExtractLaneU) \
V(IA32I8x16ExtractLaneS) \
......@@ -330,6 +332,7 @@ namespace compiler {
V(AVXI8x16GeU) \
V(IA32I8x16RoundingAverageU) \
V(IA32I8x16Abs) \
V(IA32I8x16BitMask) \
V(IA32S128Zero) \
V(SSES128Not) \
V(AVXS128Not) \
......
......@@ -207,6 +207,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEI32x4GeU:
case kAVXI32x4GeU:
case kIA32I32x4Abs:
case kIA32I32x4BitMask:
case kIA32I16x8Splat:
case kIA32I16x8ExtractLaneU:
case kIA32I16x8ExtractLaneS:
......@@ -262,6 +263,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI16x8GeU:
case kIA32I16x8RoundingAverageU:
case kIA32I16x8Abs:
case kIA32I16x8BitMask:
case kIA32I8x16Splat:
case kIA32I8x16ExtractLaneU:
case kIA32I8x16ExtractLaneS:
......@@ -311,6 +313,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI8x16GeU:
case kIA32I8x16RoundingAverageU:
case kIA32I8x16Abs:
case kIA32I8x16BitMask:
case kIA32S128Zero:
case kSSES128Not:
case kAVXS128Not:
......
......@@ -2109,6 +2109,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I32x4Abs) \
V(I32x4BitMask) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8Neg) \
......@@ -2116,7 +2117,8 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8UConvertI8x16High) \
V(I16x8Abs) \
V(I8x16Neg) \
V(I8x16Abs)
V(I8x16Abs) \
V(I8x16BitMask)
#define SIMD_UNOP_PREFIX_LIST(V) \
V(F32x4Abs) \
......@@ -2439,6 +2441,13 @@ void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
VisitPack(this, node, kAVXI16x8UConvertI32x4, kSSEI16x8UConvertI32x4);
}
void InstructionSelector::VisitI16x8BitMask(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
Emit(kIA32I16x8BitMask, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
}
void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
VisitPack(this, node, kAVXI8x16UConvertI16x8, kSSEI8x16UConvertI16x8);
}
......
......@@ -2634,11 +2634,11 @@ void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8BitMask(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -1066,6 +1066,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x50:
AppendToBuffer("vmovmskps %s,%s", NameOfCPURegister(regop),
NameOfXMMRegister(rm));
current++;
break;
case 0x51:
AppendToBuffer("vsqrtps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......@@ -1266,6 +1271,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer(",%d", Imm8(current));
current++;
break;
case 0xD7:
AppendToBuffer("vpmovmskb %s,%s", NameOfCPURegister(regop),
NameOfXMMRegister(rm));
current++;
break;
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
......@@ -2347,6 +2357,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} else if (*data == 0xB1) {
data++;
data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data);
} else if (*data == 0xD7) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pmovmskb %s,%s", NameOfCPURegister(regop),
NameOfXMMRegister(rm));
data++;
} else {
byte op = *data;
data++;
......
......@@ -546,6 +546,9 @@ TEST(DisasmIa320) {
__ pinsrw(xmm5, edx, 5);
__ pinsrw(xmm5, Operand(edx, 4), 5);
__ movmskps(edx, xmm5);
__ pmovmskb(edx, xmm5);
#define EMIT_SSE2_INSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
......@@ -782,6 +785,10 @@ TEST(DisasmIa320) {
__ vmovd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vmovd(eax, xmm1);
__ vmovd(Operand(ebx, ecx, times_4, 10000), xmm1);
__ vmovmskps(edx, xmm5);
__ vpmovmskb(ebx, xmm1);
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
......
......@@ -1661,7 +1661,7 @@ WASM_SIMD_TEST(I16x8ReplaceLane) {
}
}
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I8x16BitMask) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
......@@ -1721,7 +1721,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4BitMask) {
CHECK_EQ(actual, expected);
}
}
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(I8x16Splat) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment