Commit fd735e84 authored by Kong, Fanchen's avatar Kong, Fanchen Committed by Commit Bot

[wasm-simd][liftoff] Implement add for i32x4, i16x8, f32x4 on X64 and IA32

Bug: v8:9909
Change-Id: I8dc5d0143d90ecad6766c686af2d3f0f8ea89c16
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2067631Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#66603}
parent 2789be8f
......@@ -1563,6 +1563,11 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.fp(), 0);
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f32x4add");
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2splat");
......@@ -1573,11 +1578,21 @@ void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i32x4add");
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon16, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i16x8add");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16splat");
......
......@@ -1094,6 +1094,11 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
Dup(dst.fp().V4S(), src.fp().S(), 0);
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f32x4add");
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2splat");
......@@ -1104,11 +1109,21 @@ void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
Dup(dst.fp().V4S(), src.gp().W());
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i32x4add");
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V8H(), src.gp().W());
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i16x8add");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16splat");
......
......@@ -1943,6 +1943,19 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vaddps(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
addps(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
addps(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Pinsrd(dst.fp(), src.low_gp(), 0);
......@@ -1956,6 +1969,19 @@ void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
Pshufd(dst.fp(), dst.fp(), 0);
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddd(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddd(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movapd(dst.fp(), lhs.fp());
paddd(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -1963,6 +1989,19 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
Pshufd(dst.fp(), dst.fp(), 0);
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddw(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddw(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movapd(dst.fp(), lhs.fp());
paddw(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......
......@@ -693,9 +693,15 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void StackCheck(Label* ool_code, Register limit_address);
......
......@@ -2251,6 +2251,12 @@ class LiftoffCompiler {
__ emit_f32x4_splat(dst, src);
});
break;
case wasm::kExprF32x4Add:
EmitBinOp<kWasmS128, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_f32x4_add(dst, lhs, rhs);
});
break;
case wasm::kExprI64x2Splat:
EmitUnOp<kWasmI64, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2263,12 +2269,24 @@ class LiftoffCompiler {
__ emit_i32x4_splat(dst, src);
});
break;
case wasm::kExprI32x4Add:
EmitBinOp<kWasmS128, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i32x4_add(dst, lhs, rhs);
});
break;
case wasm::kExprI16x8Splat:
EmitUnOp<kWasmI32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
__ emit_i16x8_splat(dst, src);
});
break;
case wasm::kExprI16x8Add:
EmitBinOp<kWasmS128, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i16x8_add(dst, lhs, rhs);
});
break;
case wasm::kExprI8x16Splat:
EmitUnOp<kWasmI32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......
......@@ -1880,6 +1880,19 @@ void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vaddps(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
addps(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
addps(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movq(dst.fp(), src.gp());
......@@ -1892,6 +1905,19 @@ void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddd(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddd(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movapd(dst.fp(), lhs.fp());
paddd(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -1899,6 +1925,19 @@ void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddw(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddw(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movapd(dst.fp(), lhs.fp());
paddw(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment