Commit 485e66ba authored by Kong, Fanchen's avatar Kong, Fanchen Committed by Commit Bot

[wasm-simd] [liftoff] Implement add for f64x2, i64x2, i8x16 on X64 and IA32

Bug: v8:9909
Change-Id: I6766c1d0f347f8e0c8dea588e5984eb48ad18d5c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2094198Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#66653}
parent f63189d6
......@@ -1559,6 +1559,11 @@ void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
TurboAssembler::Move(dst.high_fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f64x2add");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.fp(), 0);
......@@ -1579,6 +1584,11 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 3);
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2add");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
......@@ -1608,6 +1618,11 @@ void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
vdup(Neon8, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16add");
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
ldr(limit_address, MemOperand(limit_address));
cmp(sp, limit_address);
......
......@@ -1089,6 +1089,11 @@ void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
Dup(dst.fp().V2D(), src.fp().D(), 0);
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f64x2add");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0);
......@@ -1104,6 +1109,11 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
Dup(dst.fp().V2D(), src.gp().X());
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2add");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.gp().W());
......@@ -1129,6 +1139,11 @@ void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
Dup(dst.fp().V16B(), src.gp().W());
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16add");
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
Ldr(limit_address, MemOperand(limit_address));
Cmp(sp, limit_address);
......
......@@ -1930,6 +1930,19 @@ void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
Movddup(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vaddpd(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
addpd(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movapd(dst.fp(), lhs.fp());
addpd(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -1963,6 +1976,19 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
Pshufd(dst.fp(), dst.fp(), 0x44);
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddq(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddq(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
paddq(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -2009,6 +2035,19 @@ void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
Pshufb(dst.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddb(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddb(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
paddb(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmp(esp, Operand(limit_address, 0));
j(below_equal, ool_code);
......
......@@ -692,10 +692,14 @@ class LiftoffAssembler : public TurboAssembler {
DoubleRegister lhs, DoubleRegister rhs);
inline void emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
......@@ -703,6 +707,8 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void StackCheck(Label* ool_code, Register limit_address);
......
......@@ -2251,6 +2251,12 @@ class LiftoffCompiler {
__ emit_f64x2_splat(dst, src);
});
break;
case wasm::kExprF64x2Add:
EmitBinOp<kWasmS128, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_f64x2_add(dst, lhs, rhs);
});
break;
case wasm::kExprF32x4Splat:
EmitUnOp<kWasmF32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2269,6 +2275,12 @@ class LiftoffCompiler {
__ emit_i64x2_splat(dst, src);
});
break;
case wasm::kExprI64x2Add:
EmitBinOp<kWasmS128, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i64x2_add(dst, lhs, rhs);
});
break;
case wasm::kExprI32x4Splat:
EmitUnOp<kWasmI32, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2299,6 +2311,12 @@ class LiftoffCompiler {
__ emit_i8x16_splat(dst, src);
});
break;
case wasm::kExprI8x16Add:
EmitBinOp<kWasmS128, kWasmS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i8x16_add(dst, lhs, rhs);
});
break;
default:
unsupported(decoder, kSimd, "simd");
}
......
......@@ -1872,6 +1872,19 @@ void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
Movddup(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vaddpd(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
addpd(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movapd(dst.fp(), lhs.fp());
addpd(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() != src.fp()) {
......@@ -1899,6 +1912,19 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
Movddup(dst.fp(), dst.fp());
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddq(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddq(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
paddq(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -1945,6 +1971,19 @@ void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
Pshufb(dst.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpaddb(dst.fp(), lhs.fp(), rhs.fp());
} else if (dst.fp() == rhs.fp()) {
paddb(dst.fp(), lhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
paddb(dst.fp(), rhs.fp());
}
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmpq(rsp, Operand(limit_address, 0));
j(below_equal, ool_code);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment