Commit 8d797820 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement I64x2ExtractLane and I64x2ReplaceLane for x64

Bug: v8:8460
Change-Id: I995996da621d84465d9bec4fc9e9d40308a92410
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1683929Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62491}
parent a7eb1331
......@@ -3082,6 +3082,42 @@ void Assembler::pextrd(Operand dst, XMMRegister src, int8_t imm8) {
emit(imm8);
}
void Assembler::pextrq(Register dst, XMMRegister src, int8_t imm8) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
emit(0x66);
emit_rex_64(src, dst);
emit(0x0F);
emit(0x3A);
emit(0x16);
emit_sse_operand(src, dst);
emit(imm8);
}
void Assembler::pinsrq(XMMRegister dst, Register src, int8_t imm8) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
emit(0x66);
emit_rex_64(dst, src);
emit(0x0F);
emit(0x3A);
emit(0x22);
emit_sse_operand(dst, src);
emit(imm8);
}
void Assembler::pinsrq(XMMRegister dst, Operand src, int8_t imm8) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
emit(0x66);
emit_rex_64(dst, src);
emit(0x0F);
emit(0x3A);
emit(0x22);
emit_sse_operand(dst, src);
emit(imm8);
}
void Assembler::pinsrd(XMMRegister dst, Register src, int8_t imm8) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
......
......@@ -1069,12 +1069,15 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void pextrw(Operand dst, XMMRegister src, int8_t imm8);
void pextrd(Register dst, XMMRegister src, int8_t imm8);
void pextrd(Operand dst, XMMRegister src, int8_t imm8);
void pextrq(Register dst, XMMRegister src, int8_t imm8);
void pinsrb(XMMRegister dst, Register src, int8_t imm8);
void pinsrb(XMMRegister dst, Operand src, int8_t imm8);
void pinsrw(XMMRegister dst, Register src, int8_t imm8);
void pinsrw(XMMRegister dst, Operand src, int8_t imm8);
void pinsrd(XMMRegister dst, Register src, int8_t imm8);
void pinsrd(XMMRegister dst, Operand src, int8_t imm8);
void pinsrq(XMMRegister dst, Register src, int8_t imm8);
void pinsrq(XMMRegister dst, Operand src, int8_t imm8);
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
......
......@@ -1853,6 +1853,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Le(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI64x2ExtractLane:
return MarkAsWord64(node), VisitI64x2ExtractLane(node);
case IrOpcode::kI64x2ReplaceLane:
return MarkAsSimd128(node), VisitI64x2ReplaceLane(node);
case IrOpcode::kI32x4Splat:
return MarkAsSimd128(node), VisitI32x4Splat(node);
case IrOpcode::kI32x4ExtractLane:
......@@ -2499,6 +2503,8 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -2419,6 +2419,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ pshufd(dst, dst, 0x44);
break;
}
case kX64I64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kX64I64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsRegister()) {
__ pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
......
......@@ -180,6 +180,8 @@ namespace compiler {
V(X64F32x4Le) \
V(X64I64x2Splat) \
V(X64I32x4Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
V(X64I32x4ExtractLane) \
V(X64I32x4ReplaceLane) \
V(X64I32x4SConvertF32x4) \
......
......@@ -146,6 +146,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Le:
case kX64I64x2Splat:
case kX64I32x4Splat:
case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
case kX64I32x4ExtractLane:
case kX64I32x4ReplaceLane:
case kX64I32x4SConvertF32x4:
......
......@@ -2547,6 +2547,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_TYPES(V) \
V(F32x4) \
V(I64x2) \
V(I32x4) \
V(I16x8) \
V(I8x16)
......@@ -2669,7 +2670,6 @@ void InstructionSelector::VisitS128Zero(Node* node) {
}
SIMD_TYPES(VISIT_SIMD_SPLAT)
VISIT_SIMD_SPLAT(F64x2)
VISIT_SIMD_SPLAT(I64x2)
#undef VISIT_SIMD_SPLAT
#define VISIT_SIMD_EXTRACT_LANE(Type) \
......
......@@ -443,6 +443,7 @@ MachineType AtomicOpType(Operator const* op) {
#define SIMD_LANE_OP_LIST(V) \
V(F32x4, 4) \
V(I64x2, 2) \
V(I32x4, 4) \
V(I16x8, 8) \
V(I8x16, 16)
......
......@@ -491,6 +491,9 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4Le();
const Operator* I64x2Splat();
const Operator* I64x2ExtractLane(int32_t);
const Operator* I64x2ReplaceLane(int32_t);
const Operator* I32x4Splat();
const Operator* I32x4ExtractLane(int32_t);
const Operator* I32x4ReplaceLane(int32_t);
......
......@@ -754,6 +754,8 @@
V(F32x4Gt) \
V(F32x4Ge) \
V(I64x2Splat) \
V(I64x2ExtractLane) \
V(I64x2ReplaceLane) \
V(I32x4Splat) \
V(I32x4ExtractLane) \
V(I32x4ReplaceLane) \
......
......@@ -4337,6 +4337,12 @@ Node* WasmGraphBuilder::SimdLaneOp(wasm::WasmOpcode opcode, uint8_t lane,
case wasm::kExprF32x4ReplaceLane:
return graph()->NewNode(mcgraph()->machine()->F32x4ReplaceLane(lane),
inputs[0], inputs[1]);
case wasm::kExprI64x2ExtractLane:
return graph()->NewNode(mcgraph()->machine()->I64x2ExtractLane(lane),
inputs[0]);
case wasm::kExprI64x2ReplaceLane:
return graph()->NewNode(mcgraph()->machine()->I64x2ReplaceLane(lane),
inputs[0], inputs[1]);
case wasm::kExprI32x4ExtractLane:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtractLane(lane),
inputs[0]);
......
......@@ -1715,13 +1715,14 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
current += 1;
} else if (third_byte == 0x16) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("pextrd "); // reg/m32, xmm, imm8
// reg/m32/reg/m64, xmm, imm8
AppendToBuffer("pextr%c ", rex_w() ? 'q' : 'd');
current += PrintRightOperand(current);
AppendToBuffer(",%s,%d", NameOfXMMRegister(regop), (*current) & 3);
current += 1;
} else if (third_byte == 0x20) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("pinsrd "); // xmm, reg/m32, imm8
AppendToBuffer("pinsrb "); // xmm, reg/m32, imm8
AppendToBuffer(" %s,", NameOfXMMRegister(regop));
current += PrintRightOperand(current);
AppendToBuffer(",%d", (*current) & 3);
......@@ -1735,7 +1736,8 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
current += 1;
} else if (third_byte == 0x22) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("pinsrd "); // xmm, reg/m32, imm8
// xmm, reg/m32/reg/m64, imm8
AppendToBuffer("pinsr%c ", rex_w() ? 'q' : 'd');
AppendToBuffer(" %s,", NameOfXMMRegister(regop));
current += PrintRightOperand(current);
AppendToBuffer(",%d", (*current) & 3);
......
......@@ -1050,6 +1050,10 @@ class WasmDecoder : public Decoder {
SimdLaneImmediate<validate>& imm) {
uint8_t num_lanes = 0;
switch (opcode) {
case kExprI64x2ExtractLane:
case kExprI64x2ReplaceLane:
num_lanes = 2;
break;
case kExprF32x4ExtractLane:
case kExprF32x4ReplaceLane:
case kExprI32x4ExtractLane:
......@@ -2679,6 +2683,10 @@ class WasmFullDecoder : public WasmDecoder<validate> {
len = SimdExtractLane(opcode, kWasmF32);
break;
}
case kExprI64x2ExtractLane: {
len = SimdExtractLane(opcode, kWasmI64);
break;
}
case kExprI32x4ExtractLane:
case kExprI16x8ExtractLane:
case kExprI8x16ExtractLane: {
......@@ -2689,6 +2697,10 @@ class WasmFullDecoder : public WasmDecoder<validate> {
len = SimdReplaceLane(opcode, kWasmF32);
break;
}
case kExprI64x2ReplaceLane: {
len = SimdReplaceLane(opcode, kWasmI64);
break;
}
case kExprI32x4ReplaceLane:
case kExprI16x8ReplaceLane:
case kExprI8x16ReplaceLane: {
......
......@@ -2151,6 +2151,7 @@ class ThreadImpl {
Push(WasmValue(ss.val[LANE(imm.lane, ss)])); \
return true; \
}
EXTRACT_LANE_CASE(I64x2, i64x2)
EXTRACT_LANE_CASE(I32x4, i32x4)
EXTRACT_LANE_CASE(F32x4, f32x4)
EXTRACT_LANE_CASE(I16x8, i16x8)
......@@ -2314,6 +2315,7 @@ class ThreadImpl {
return true; \
}
REPLACE_LANE_CASE(F32x4, f32x4, float4, float)
REPLACE_LANE_CASE(I64x2, i64x2, int2, int64_t)
REPLACE_LANE_CASE(I32x4, i32x4, int4, int32_t)
REPLACE_LANE_CASE(I16x8, i16x8, int8, int32_t)
REPLACE_LANE_CASE(I8x16, i8x16, int16, int32_t)
......
......@@ -245,6 +245,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_CONVERT_OP(Convert, I8x16, I16x8, "i32", "convert")
CASE_F32x4_OP(ExtractLane, "extract_lane")
CASE_F32x4_OP(ReplaceLane, "replace_lane")
CASE_I64x2_OP(ExtractLane, "extract_lane")
CASE_I64x2_OP(ReplaceLane, "replace_lane")
CASE_SIMDI_OP(ExtractLane, "extract_lane")
CASE_SIMDI_OP(ReplaceLane, "replace_lane")
CASE_SIGN_OP(SIMDI, Min, "min")
......
......@@ -387,6 +387,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I8x16ExtractLane, 0xfd05, _) \
V(I16x8ExtractLane, 0xfd09, _) \
V(I32x4ExtractLane, 0xfd0d, _) \
V(I64x2ExtractLane, 0xfd10, _) \
V(F32x4ExtractLane, 0xfd13, _) \
V(I8x16Shl, 0xfd54, _) \
V(I8x16ShrS, 0xfd55, _) \
......@@ -402,6 +403,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I8x16ReplaceLane, 0xfd07, _) \
V(I16x8ReplaceLane, 0xfd0b, _) \
V(I32x4ReplaceLane, 0xfd0e, _) \
V(I64x2ReplaceLane, 0xfd11, _) \
V(F32x4ReplaceLane, 0xfd14, _)
#define FOREACH_SIMD_1_OPERAND_OPCODE(V) \
......
......@@ -307,10 +307,12 @@ void PrintWasmText(const WasmModule* module, const ModuleWireBytes& wire_bytes,
case kExprI8x16ExtractLane:
case kExprI16x8ExtractLane:
case kExprI32x4ExtractLane:
case kExprI64x2ExtractLane:
case kExprF32x4ExtractLane:
case kExprI8x16ReplaceLane:
case kExprI16x8ReplaceLane:
case kExprI32x4ReplaceLane:
case kExprI64x2ReplaceLane:
case kExprF32x4ReplaceLane: {
SimdLaneImmediate<Decoder::kNoValidate> imm(&i, i.pc());
os << WasmOpcodes::OpcodeName(opcode) << ' ' << imm.lane;
......
......@@ -542,8 +542,11 @@ TEST(DisasmX64) {
__ pinsrw(xmm2, rcx, 1);
__ pextrd(rbx, xmm15, 0);
__ pextrd(r12, xmm0, 1);
__ pextrq(r12, xmm0, 1);
__ pinsrd(xmm9, r9, 0);
__ pinsrd(xmm5, Operand(rax, 4), 1);
__ pinsrq(xmm9, r9, 0);
__ pinsrq(xmm5, Operand(rax, 4), 1);
__ pblendw(xmm5, xmm1, 1);
__ pblendw(xmm9, Operand(rax, 4), 1);
......
......@@ -291,6 +291,11 @@ T Sqrt(T a) {
x, y, WASM_SIMD_OP(kExprF32x4ReplaceLane), TO_BYTE(lane)
#define WASM_SIMD_I64x2_SPLAT(x) WASM_SIMD_SPLAT(I64x2, x)
#define WASM_SIMD_I64x2_EXTRACT_LANE(lane, x) \
x, WASM_SIMD_OP(kExprI64x2ExtractLane), TO_BYTE(lane)
#define WASM_SIMD_I64x2_REPLACE_LANE(lane, x, y) \
x, y, WASM_SIMD_OP(kExprI64x2ReplaceLane), TO_BYTE(lane)
#define WASM_SIMD_I32x4_SPLAT(x) WASM_SIMD_SPLAT(I32x4, x)
#define WASM_SIMD_I32x4_EXTRACT_LANE(lane, x) \
x, WASM_SIMD_OP(kExprI32x4ExtractLane), TO_BYTE(lane)
......@@ -724,6 +729,35 @@ WASM_SIMD_TEST(I64x2Splat) {
}
}
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) {
WasmRunner<int64_t> r(execution_tier, lower_simd);
BUILD(r, WASM_IF_ELSE_L(
WASM_I64_EQ(WASM_SIMD_I64x2_EXTRACT_LANE(
0, WASM_SIMD_F64x2_SPLAT(WASM_F64(1e15))),
WASM_I64_REINTERPRET_F64(WASM_F64(1e15))),
WASM_I64V(1), WASM_I64V(0)));
CHECK_EQ(1, r.Call());
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ReplaceLane) {
WasmRunner<int32_t> r(execution_tier, lower_simd);
// Set up a global to hold input/output vector.
int64_t* g = r.builder().AddGlobal<int64_t>(kWasmS128);
// Build function to replace each lane with its index.
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_I64x2_SPLAT(WASM_I64V(-1))),
WASM_SET_LOCAL(temp1, WASM_SIMD_I64x2_REPLACE_LANE(
0, WASM_GET_LOCAL(temp1), WASM_I64V(0))),
WASM_SET_GLOBAL(0, WASM_SIMD_I64x2_REPLACE_LANE(
1, WASM_GET_LOCAL(temp1), WASM_I64V(1))),
WASM_ONE);
r.Call();
for (int64_t i = 0; i < 2; i++) {
CHECK_EQ(i, ReadLittleEndianValue<int64_t>(&g[i]));
}
}
#endif // V8_TARGET_ARCH_X64
WASM_SIMD_TEST(I32x4Splat) {
......@@ -2471,9 +2505,13 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8GtUMixed) {
#undef WASM_SIMD_SHIFT_OP
#undef WASM_SIMD_CONCAT_OP
#undef WASM_SIMD_SELECT
#undef WASM_SIMD_F64x2_SPLAT
#undef WASM_SIMD_F32x4_SPLAT
#undef WASM_SIMD_F32x4_EXTRACT_LANE
#undef WASM_SIMD_F32x4_REPLACE_LANE
#undef WASM_SIMD_I64x2_SPLAT
#undef WASM_SIMD_I64x2_EXTRACT_LANE
#undef WASM_SIMD_I64x2_REPLACE_LANE
#undef WASM_SIMD_I32x4_SPLAT
#undef WASM_SIMD_I32x4_EXTRACT_LANE
#undef WASM_SIMD_I32x4_REPLACE_LANE
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment