Commit 9245e3b4 authored by Zhou, Zhiguo's avatar Zhou, Zhiguo Committed by Commit Bot

[wasm-simd][liftoff] Implement splat for i64x2, f64x2, i8x16 on x64 and ia32

Bug: v8:9909
Change-Id: Id89ddb429c90f3f454a581d75b783ff56775b01e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2049247
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66552}
parent 66251fba
...@@ -1540,11 +1540,21 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1540,11 +1540,21 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
} }
} }
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2splat");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.fp(), 0); vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.fp(), 0);
} }
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2splat");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.gp()); vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
...@@ -1555,6 +1565,11 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, ...@@ -1555,6 +1565,11 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
vdup(Neon16, liftoff::GetSimd128Register(dst.low_fp()), src.gp()); vdup(Neon16, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
} }
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16splat");
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
ldr(limit_address, MemOperand(limit_address)); ldr(limit_address, MemOperand(limit_address));
cmp(sp, limit_address); cmp(sp, limit_address);
......
...@@ -1071,11 +1071,21 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1071,11 +1071,21 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
} }
} }
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "f64x2splat");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0); Dup(dst.fp().V4S(), src.fp().S(), 0);
} }
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2splat");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Dup(dst.fp().V4S(), src.gp().W()); Dup(dst.fp().V4S(), src.gp().W());
...@@ -1086,6 +1096,11 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, ...@@ -1086,6 +1096,11 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
Dup(dst.fp().V8H(), src.gp().W()); Dup(dst.fp().V8H(), src.gp().W());
} }
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16splat");
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
Ldr(limit_address, MemOperand(limit_address)); Ldr(limit_address, MemOperand(limit_address));
Cmp(sp, limit_address); Cmp(sp, limit_address);
......
...@@ -1912,6 +1912,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1912,6 +1912,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs); liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movddup(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
...@@ -1925,6 +1930,13 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, ...@@ -1925,6 +1930,13 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
} }
} }
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Pinsrd(dst.fp(), src.low_gp(), 0);
Pinsrd(dst.fp(), src.high_gp(), 1);
Pshufd(dst.fp(), dst.fp(), 0x44);
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Movd(dst.fp(), src.gp()); Movd(dst.fp(), src.gp());
...@@ -1938,6 +1950,13 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, ...@@ -1938,6 +1950,13 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
Pshufd(dst.fp(), dst.fp(), 0); Pshufd(dst.fp(), dst.fp(), 0);
} }
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Pshufb(dst.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmp(esp, Operand(limit_address, 0)); cmp(esp, Operand(limit_address, 0));
j(below_equal, ool_code); j(below_equal, ool_code);
......
...@@ -681,9 +681,12 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -681,9 +681,12 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f64_set_cond(Condition condition, Register dst, inline void emit_f64_set_cond(Condition condition, Register dst,
DoubleRegister lhs, DoubleRegister rhs); DoubleRegister lhs, DoubleRegister rhs);
inline void emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void StackCheck(Label* ool_code, Register limit_address); inline void StackCheck(Label* ool_code, Register limit_address);
......
...@@ -2240,12 +2240,24 @@ class LiftoffCompiler { ...@@ -2240,12 +2240,24 @@ class LiftoffCompiler {
return unsupported(decoder, kSimd, "simd"); return unsupported(decoder, kSimd, "simd");
} }
switch (opcode) { switch (opcode) {
case wasm::kExprF64x2Splat:
EmitUnOp<kWasmF64, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
__ emit_f64x2_splat(dst, src);
});
break;
case wasm::kExprF32x4Splat: case wasm::kExprF32x4Splat:
EmitUnOp<kWasmF32, kWasmS128>( EmitUnOp<kWasmF32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) { [=](LiftoffRegister dst, LiftoffRegister src) {
__ emit_f32x4_splat(dst, src); __ emit_f32x4_splat(dst, src);
}); });
break; break;
case wasm::kExprI64x2Splat:
EmitUnOp<kWasmI64, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
__ emit_i64x2_splat(dst, src);
});
break;
case wasm::kExprI32x4Splat: case wasm::kExprI32x4Splat:
EmitUnOp<kWasmI32, kWasmS128>( EmitUnOp<kWasmI32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) { [=](LiftoffRegister dst, LiftoffRegister src) {
...@@ -2258,6 +2270,12 @@ class LiftoffCompiler { ...@@ -2258,6 +2270,12 @@ class LiftoffCompiler {
__ emit_i16x8_splat(dst, src); __ emit_i16x8_splat(dst, src);
}); });
break; break;
case wasm::kExprI8x16Splat:
EmitUnOp<kWasmI32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
__ emit_i8x16_splat(dst, src);
});
break;
default: default:
unsupported(decoder, kSimd, "simd"); unsupported(decoder, kSimd, "simd");
} }
......
...@@ -1774,6 +1774,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1774,6 +1774,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
rhs); rhs);
} }
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movddup(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (dst.fp() != src.fp()) { if (dst.fp() != src.fp()) {
...@@ -1782,6 +1787,12 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, ...@@ -1782,6 +1787,12 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
Shufps(dst.fp(), src.fp(), static_cast<byte>(0)); Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
} }
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movq(dst.fp(), src.gp());
Movddup(dst.fp(), dst.fp());
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Movd(dst.fp(), src.gp()); Movd(dst.fp(), src.gp());
...@@ -1795,6 +1806,13 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, ...@@ -1795,6 +1806,13 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0)); Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
} }
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
Pxor(kScratchDoubleReg, kScratchDoubleReg);
Pshufb(dst.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmpq(rsp, Operand(limit_address, 0)); cmpq(rsp, Operand(limit_address, 0));
j(below_equal, ool_code); j(below_equal, ool_code);
......
...@@ -509,6 +509,11 @@ bool ExpectFused(ExecutionTier tier) { ...@@ -509,6 +509,11 @@ bool ExpectFused(ExecutionTier tier) {
EXPERIMENTAL_FLAG_SCOPE(simd); \ EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kTurbofan); \ RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kTurbofan); \
} \ } \
TEST(RunWasm_##name##_liftoff) { \
if (!CpuFeatures::SupportsWasmSimd128()) return; \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kLiftoff); \
} \
TEST(RunWasm_##name##_interpreter) { \ TEST(RunWasm_##name##_interpreter) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \ EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kInterpreter); \ RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kInterpreter); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment