Commit 009993ad authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[liftoff][wasm-simd] Implement f32x4.splat

Implement f32x4.splat and enable handling this in Liftoff.

We add a new macro for defining test cases to run on TurboFan, Liftoff,
interpreter, and scalar lowering.

Also add an assertion that the execution tier used is what we expected
it to be. This is useful for Liftoff, because by default it falls back
to TurboFan when it encounters an unimplemented opcode.

Bug: v8:9909
Change-Id: I594955fce778173191fc44c38c4f956a05e77839
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2014753
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65954}
parent 0a97c3f3
...@@ -1507,6 +1507,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1507,6 +1507,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
} }
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.fp(), 0);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
ldr(limit_address, MemOperand(limit_address)); ldr(limit_address, MemOperand(limit_address));
cmp(sp, limit_address); cmp(sp, limit_address);
......
...@@ -1045,6 +1045,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1045,6 +1045,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
} }
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
Ldr(limit_address, MemOperand(limit_address)); Ldr(limit_address, MemOperand(limit_address));
Cmp(sp, limit_address); Cmp(sp, limit_address);
......
...@@ -1888,6 +1888,16 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1888,6 +1888,16 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs); liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), src.fp(), src.fp(), 0);
} else {
shufps(dst.fp(), src.fp(), 0);
}
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmp(esp, Operand(limit_address, 0)); cmp(esp, Operand(limit_address, 0));
j(below_equal, ool_code); j(below_equal, ool_code);
......
...@@ -663,6 +663,8 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -663,6 +663,8 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f64_set_cond(Condition condition, Register dst, inline void emit_f64_set_cond(Condition condition, Register dst,
DoubleRegister lhs, DoubleRegister rhs); DoubleRegister lhs, DoubleRegister rhs);
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void StackCheck(Label* ool_code, Register limit_address); inline void StackCheck(Label* ool_code, Register limit_address);
inline void CallTrapCallbackForTesting(); inline void CallTrapCallbackForTesting();
......
...@@ -2177,10 +2177,21 @@ class LiftoffCompiler { ...@@ -2177,10 +2177,21 @@ class LiftoffCompiler {
const Value args[]) { const Value args[]) {
unsupported(decoder, kTailCall, "return_call_indirect"); unsupported(decoder, kTailCall, "return_call_indirect");
} }
void SimdOp(FullDecoder* decoder, WasmOpcode opcode, Vector<Value> args, void SimdOp(FullDecoder* decoder, WasmOpcode opcode, Vector<Value> args,
Value* result) { Value* result) {
unsupported(decoder, kSimd, "simd"); switch (opcode) {
case wasm::kExprF32x4Splat:
EmitUnOp<kWasmF32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
__ emit_f32x4_splat(dst, src);
});
break;
default:
unsupported(decoder, kSimd, "simd");
}
} }
void SimdLaneOp(FullDecoder* decoder, WasmOpcode opcode, void SimdLaneOp(FullDecoder* decoder, WasmOpcode opcode,
const SimdLaneImmediate<validate>& imm, const SimdLaneImmediate<validate>& imm,
const Vector<Value> inputs, Value* result) { const Vector<Value> inputs, Value* result) {
......
...@@ -1609,6 +1609,14 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1609,6 +1609,14 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
rhs); rhs);
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() != src.fp()) {
Movss(dst.fp(), src.fp());
}
Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmpq(rsp, Operand(limit_address, 0)); cmpq(rsp, Operand(limit_address, 0));
j(below_equal, ool_code); j(below_equal, ool_code);
......
...@@ -58,6 +58,27 @@ using Int8ShiftOp = int8_t (*)(int8_t, int); ...@@ -58,6 +58,27 @@ using Int8ShiftOp = int8_t (*)(int8_t, int);
} \ } \
void RunWasm_##name##_Impl(LowerSimd lower_simd, ExecutionTier execution_tier) void RunWasm_##name##_Impl(LowerSimd lower_simd, ExecutionTier execution_tier)
#define WASM_SIMD_TEST_WITH_LIFTOFF(name) \
void RunWasm_##name##_Impl(LowerSimd lower_simd, \
ExecutionTier execution_tier); \
TEST(RunWasm_##name##_turbofan) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kTurbofan); \
} \
TEST(RunWasm_##name##_liftoff) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kLiftoff); \
} \
TEST(RunWasm_##name##_interpreter) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kInterpreter); \
} \
TEST(RunWasm_##name##_simd_lowered) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kLowerSimd, ExecutionTier::kTurbofan); \
} \
void RunWasm_##name##_Impl(LowerSimd lower_simd, ExecutionTier execution_tier)
// Generic expected value functions. // Generic expected value functions.
template <typename T, typename = typename std::enable_if< template <typename T, typename = typename std::enable_if<
std::is_floating_point<T>::value>::type> std::is_floating_point<T>::value>::type>
...@@ -544,13 +565,14 @@ WASM_SIMD_TEST(S128Globals) { ...@@ -544,13 +565,14 @@ WASM_SIMD_TEST(S128Globals) {
} }
} }
WASM_SIMD_TEST(F32x4Splat) { WASM_SIMD_TEST_WITH_LIFTOFF(F32x4Splat) {
WasmRunner<int32_t, float> r(execution_tier, lower_simd); WasmRunner<int32_t, float> r(execution_tier, lower_simd);
// Set up a global to hold output vector. // Set up a global to hold output vector.
float* g = r.builder().AddGlobal<float>(kWasmS128); float* g = r.builder().AddGlobal<float>(kWasmS128);
byte param1 = 0; byte param1 = 0;
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(param1))), BUILD_AND_CHECK_TIER(
WASM_ONE); r, WASM_SET_GLOBAL(0, WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(param1))),
WASM_ONE);
FOR_FLOAT32_INPUTS(x) { FOR_FLOAT32_INPUTS(x) {
r.Call(x); r.Call(x);
...@@ -3569,6 +3591,7 @@ WASM_EXTRACT_I16x8_TEST(S, UINT16) WASM_EXTRACT_I16x8_TEST(I, INT16) ...@@ -3569,6 +3591,7 @@ WASM_EXTRACT_I16x8_TEST(S, UINT16) WASM_EXTRACT_I16x8_TEST(I, INT16)
#undef WASM_EXTRACT_I8x16_TEST #undef WASM_EXTRACT_I8x16_TEST
#undef WASM_SIMD_TEST #undef WASM_SIMD_TEST
#undef WASM_SIMD_TEST_WITH_LIFTOFF
#undef WASM_SIMD_CHECK_LANE_S #undef WASM_SIMD_CHECK_LANE_S
#undef WASM_SIMD_CHECK_LANE_U #undef WASM_SIMD_CHECK_LANE_U
#undef TO_BYTE #undef TO_BYTE
......
...@@ -75,6 +75,13 @@ using compiler::Node; ...@@ -75,6 +75,13 @@ using compiler::Node;
r.Build(code, code + arraysize(code)); \ r.Build(code, code + arraysize(code)); \
} while (false) } while (false)
#define BUILD_AND_CHECK_TIER(r, ...) \
do { \
byte code[] = {__VA_ARGS__}; \
r.Build(code, code + arraysize(code)); \
r.CheckUsedExecutionTier(execution_tier); \
} while (false)
// For tests that must manually import a JSFunction with source code. // For tests that must manually import a JSFunction with source code.
struct ManuallyImportedJSFunction { struct ManuallyImportedJSFunction {
FunctionSig* sig; FunctionSig* sig;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment