Commit b110d480 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[liftoff][wasm-simd] Implement f32x4.splat"

This relands commit 009993ad.

The fix is in liftoff-assembler-ia32.h, the codegen was incorrect.

Original change's description:
> Implement f32x4.splat and enable handling this in Liftoff.
>
> We add a new macro for defining test cases to run on TurboFan, Liftoff,
> interpreter, and scalar lowering.
>
> Also add an assertion that the execution tier used is what we expected
> it to be. This is useful for Liftoff, because by default it falls back
> to TurboFan when it encounters an unimplemented opcode.
>
> Bug: v8:9909

Bug: v8:9909
Change-Id: I7daacbe8b195d9212367190c515b0babbc457a88
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2018043Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65963}
parent 867a2040
...@@ -1507,6 +1507,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1507,6 +1507,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
} }
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.fp(), 0);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
ldr(limit_address, MemOperand(limit_address)); ldr(limit_address, MemOperand(limit_address));
cmp(sp, limit_address); cmp(sp, limit_address);
......
...@@ -1045,6 +1045,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1045,6 +1045,11 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
} }
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
Ldr(limit_address, MemOperand(limit_address)); Ldr(limit_address, MemOperand(limit_address));
Cmp(sp, limit_address); Cmp(sp, limit_address);
......
...@@ -1888,6 +1888,19 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1888,6 +1888,19 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs); liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), src.fp(), src.fp(), 0);
} else {
if (dst.fp() != src.fp()) {
movss(dst.fp(), src.fp());
}
shufps(dst.fp(), src.fp(), 0);
}
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmp(esp, Operand(limit_address, 0)); cmp(esp, Operand(limit_address, 0));
j(below_equal, ool_code); j(below_equal, ool_code);
......
...@@ -663,6 +663,8 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -663,6 +663,8 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f64_set_cond(Condition condition, Register dst, inline void emit_f64_set_cond(Condition condition, Register dst,
DoubleRegister lhs, DoubleRegister rhs); DoubleRegister lhs, DoubleRegister rhs);
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void StackCheck(Label* ool_code, Register limit_address); inline void StackCheck(Label* ool_code, Register limit_address);
inline void CallTrapCallbackForTesting(); inline void CallTrapCallbackForTesting();
......
...@@ -2177,10 +2177,21 @@ class LiftoffCompiler { ...@@ -2177,10 +2177,21 @@ class LiftoffCompiler {
const Value args[]) { const Value args[]) {
unsupported(decoder, kTailCall, "return_call_indirect"); unsupported(decoder, kTailCall, "return_call_indirect");
} }
void SimdOp(FullDecoder* decoder, WasmOpcode opcode, Vector<Value> args, void SimdOp(FullDecoder* decoder, WasmOpcode opcode, Vector<Value> args,
Value* result) { Value* result) {
unsupported(decoder, kSimd, "simd"); switch (opcode) {
case wasm::kExprF32x4Splat:
EmitUnOp<kWasmF32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
__ emit_f32x4_splat(dst, src);
});
break;
default:
unsupported(decoder, kSimd, "simd");
}
} }
void SimdLaneOp(FullDecoder* decoder, WasmOpcode opcode, void SimdLaneOp(FullDecoder* decoder, WasmOpcode opcode,
const SimdLaneImmediate<validate>& imm, const SimdLaneImmediate<validate>& imm,
const Vector<Value> inputs, Value* result) { const Vector<Value> inputs, Value* result) {
......
...@@ -1609,6 +1609,14 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ...@@ -1609,6 +1609,14 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
rhs); rhs);
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() != src.fp()) {
Movss(dst.fp(), src.fp());
}
Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmpq(rsp, Operand(limit_address, 0)); cmpq(rsp, Operand(limit_address, 0));
j(below_equal, ool_code); j(below_equal, ool_code);
......
...@@ -58,6 +58,27 @@ using Int8ShiftOp = int8_t (*)(int8_t, int); ...@@ -58,6 +58,27 @@ using Int8ShiftOp = int8_t (*)(int8_t, int);
} \ } \
void RunWasm_##name##_Impl(LowerSimd lower_simd, ExecutionTier execution_tier) void RunWasm_##name##_Impl(LowerSimd lower_simd, ExecutionTier execution_tier)
#define WASM_SIMD_TEST_WITH_LIFTOFF(name) \
void RunWasm_##name##_Impl(LowerSimd lower_simd, \
ExecutionTier execution_tier); \
TEST(RunWasm_##name##_turbofan) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kTurbofan); \
} \
TEST(RunWasm_##name##_liftoff) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kLiftoff); \
} \
TEST(RunWasm_##name##_interpreter) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kNoLowerSimd, ExecutionTier::kInterpreter); \
} \
TEST(RunWasm_##name##_simd_lowered) { \
EXPERIMENTAL_FLAG_SCOPE(simd); \
RunWasm_##name##_Impl(kLowerSimd, ExecutionTier::kTurbofan); \
} \
void RunWasm_##name##_Impl(LowerSimd lower_simd, ExecutionTier execution_tier)
// Generic expected value functions. // Generic expected value functions.
template <typename T, typename = typename std::enable_if< template <typename T, typename = typename std::enable_if<
std::is_floating_point<T>::value>::type> std::is_floating_point<T>::value>::type>
...@@ -544,13 +565,14 @@ WASM_SIMD_TEST(S128Globals) { ...@@ -544,13 +565,14 @@ WASM_SIMD_TEST(S128Globals) {
} }
} }
WASM_SIMD_TEST(F32x4Splat) { WASM_SIMD_TEST_WITH_LIFTOFF(F32x4Splat) {
WasmRunner<int32_t, float> r(execution_tier, lower_simd); WasmRunner<int32_t, float> r(execution_tier, lower_simd);
// Set up a global to hold output vector. // Set up a global to hold output vector.
float* g = r.builder().AddGlobal<float>(kWasmS128); float* g = r.builder().AddGlobal<float>(kWasmS128);
byte param1 = 0; byte param1 = 0;
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(param1))), BUILD_AND_CHECK_TIER(
WASM_ONE); r, WASM_SET_GLOBAL(0, WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(param1))),
WASM_ONE);
FOR_FLOAT32_INPUTS(x) { FOR_FLOAT32_INPUTS(x) {
r.Call(x); r.Call(x);
...@@ -3569,6 +3591,7 @@ WASM_EXTRACT_I16x8_TEST(S, UINT16) WASM_EXTRACT_I16x8_TEST(I, INT16) ...@@ -3569,6 +3591,7 @@ WASM_EXTRACT_I16x8_TEST(S, UINT16) WASM_EXTRACT_I16x8_TEST(I, INT16)
#undef WASM_EXTRACT_I8x16_TEST #undef WASM_EXTRACT_I8x16_TEST
#undef WASM_SIMD_TEST #undef WASM_SIMD_TEST
#undef WASM_SIMD_TEST_WITH_LIFTOFF
#undef WASM_SIMD_CHECK_LANE_S #undef WASM_SIMD_CHECK_LANE_S
#undef WASM_SIMD_CHECK_LANE_U #undef WASM_SIMD_CHECK_LANE_U
#undef TO_BYTE #undef TO_BYTE
......
...@@ -75,6 +75,13 @@ using compiler::Node; ...@@ -75,6 +75,13 @@ using compiler::Node;
r.Build(code, code + arraysize(code)); \ r.Build(code, code + arraysize(code)); \
} while (false) } while (false)
#define BUILD_AND_CHECK_TIER(r, ...) \
do { \
byte code[] = {__VA_ARGS__}; \
r.Build(code, code + arraysize(code)); \
r.CheckUsedExecutionTier(execution_tier); \
} while (false)
// For tests that must manually import a JSFunction with source code. // For tests that must manually import a JSFunction with source code.
struct ManuallyImportedJSFunction { struct ManuallyImportedJSFunction {
FunctionSig* sig; FunctionSig* sig;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment