Commit 3e1b6b40 authored by jing.bao's avatar jing.bao Committed by Commit Bot

[wasm-simd] [liftoff] Implement sub on x64 and ia32

Bug: v8:9909
Change-Id: If16056ebe0e8c3519afd49982561f96655a03786
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2101129Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#66758}
parent 904f12eb
......@@ -1572,6 +1572,11 @@ void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
vadd(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
}
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f64x2sub");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.fp(), 0);
......@@ -1591,6 +1596,11 @@ void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::GetSimd128Register(rhs.low_fp()));
}
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f32x4sub");
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Simd128Register dst_simd = liftoff::GetSimd128Register(dst.low_fp());
......@@ -1615,6 +1625,11 @@ void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::GetSimd128Register(rhs.low_fp()));
}
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2sub");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
......@@ -1634,6 +1649,11 @@ void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::GetSimd128Register(rhs.low_fp()));
}
void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i32x4sub");
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon16, liftoff::GetSimd128Register(dst.low_fp()), src.gp());
......@@ -1646,6 +1666,11 @@ void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::GetSimd128Register(rhs.low_fp()));
}
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i16x8sub");
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
......@@ -1686,6 +1711,11 @@ void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::GetSimd128Register(rhs.low_fp()));
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16sub");
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
ldr(limit_address, MemOperand(limit_address));
cmp(sp, limit_address);
......
......@@ -1100,6 +1100,11 @@ void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f64x2sub");
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0);
......@@ -1116,6 +1121,11 @@ void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "f32x4sub");
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V2D(), src.gp().X());
......@@ -1132,6 +1142,11 @@ void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2sub");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.gp().W());
......@@ -1148,6 +1163,11 @@ void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i32x4sub");
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V8H(), src.gp().W());
......@@ -1170,6 +1190,11 @@ void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i16x8sub");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V16B(), src.gp().W());
......@@ -1192,6 +1217,11 @@ void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16sub");
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
Ldr(limit_address, MemOperand(limit_address));
Cmp(sp, limit_address);
......
......@@ -1940,6 +1940,25 @@ void EmitSimdCommutativeBinOp(LiftoffAssembler* assm, LiftoffRegister dst,
(assm->*sse_op)(dst.fp(), rhs.fp());
}
}
template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
} else if (lhs.fp() == rhs.fp()) {
assm->pxor(dst.fp(), dst.fp());
} else if (dst.fp() == rhs.fp()) {
assm->movaps(kScratchDoubleReg, rhs.fp());
assm->movaps(dst.fp(), lhs.fp());
(assm->*sse_op)(dst.fp(), kScratchDoubleReg);
} else {
if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
(assm->*sse_op)(dst.fp(), rhs.fp());
}
}
} // namespace liftoff
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
......@@ -1965,6 +1984,12 @@ void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vsubpd, &Assembler::subpd>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -1996,6 +2021,12 @@ void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vsubps, &Assembler::subps>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Pinsrd(dst.fp(), src.low_gp(), 0);
......@@ -2016,6 +2047,12 @@ void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubq, &Assembler::psubq>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -2034,6 +2071,12 @@ void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubd, &Assembler::psubd>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -2060,6 +2103,12 @@ void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubw, &Assembler::psubw>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -2086,6 +2135,12 @@ void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubb, &Assembler::psubb>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmp(esp, Operand(limit_address, 0));
j(below_equal, ool_code);
......
......@@ -696,21 +696,29 @@ class LiftoffAssembler : public TurboAssembler {
uint8_t imm_lane_idx);
inline void emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
......@@ -720,6 +728,8 @@ class LiftoffAssembler : public TurboAssembler {
uint8_t imm_lane_idx);
inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
......@@ -729,6 +739,8 @@ class LiftoffAssembler : public TurboAssembler {
uint8_t imm_lane_idx);
inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void StackCheck(Label* ool_code, Register limit_address);
......
......@@ -2271,6 +2271,12 @@ class LiftoffCompiler {
__ emit_f64x2_add(dst, lhs, rhs);
});
break;
case wasm::kExprF64x2Sub:
EmitBinOp<ValueType::kS128, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_f64x2_sub(dst, lhs, rhs);
});
break;
case wasm::kExprF32x4Splat:
EmitUnOp<ValueType::kF32, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2283,6 +2289,12 @@ class LiftoffCompiler {
__ emit_f32x4_add(dst, lhs, rhs);
});
break;
case wasm::kExprF32x4Sub:
EmitBinOp<ValueType::kS128, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_f32x4_sub(dst, lhs, rhs);
});
break;
case wasm::kExprI64x2Splat:
EmitUnOp<ValueType::kI64, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2295,6 +2307,12 @@ class LiftoffCompiler {
__ emit_i64x2_add(dst, lhs, rhs);
});
break;
case wasm::kExprI64x2Sub:
EmitBinOp<ValueType::kS128, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i64x2_sub(dst, lhs, rhs);
});
break;
case wasm::kExprI32x4Splat:
EmitUnOp<ValueType::kI32, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2307,6 +2325,12 @@ class LiftoffCompiler {
__ emit_i32x4_add(dst, lhs, rhs);
});
break;
case wasm::kExprI32x4Sub:
EmitBinOp<ValueType::kS128, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i32x4_sub(dst, lhs, rhs);
});
break;
case wasm::kExprI16x8Splat:
EmitUnOp<ValueType::kI32, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2319,6 +2343,12 @@ class LiftoffCompiler {
__ emit_i16x8_add(dst, lhs, rhs);
});
break;
case wasm::kExprI16x8Sub:
EmitBinOp<ValueType::kS128, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i16x8_sub(dst, lhs, rhs);
});
break;
case wasm::kExprI8x16Splat:
EmitUnOp<ValueType::kI32, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister src) {
......@@ -2331,6 +2361,12 @@ class LiftoffCompiler {
__ emit_i8x16_add(dst, lhs, rhs);
});
break;
case wasm::kExprI8x16Sub:
EmitBinOp<ValueType::kS128, ValueType::kS128>(
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) {
__ emit_i8x16_sub(dst, lhs, rhs);
});
break;
default:
unsupported(decoder, kSimd, "simd");
}
......
......@@ -1890,6 +1890,25 @@ void EmitSimdCommutativeBinOp(LiftoffAssembler* assm, LiftoffRegister dst,
(assm->*sse_op)(dst.fp(), rhs.fp());
}
}
template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
} else if (lhs.fp() == rhs.fp()) {
assm->pxor(dst.fp(), dst.fp());
} else if (dst.fp() == rhs.fp()) {
assm->movaps(kScratchDoubleReg, rhs.fp());
assm->movaps(dst.fp(), lhs.fp());
(assm->*sse_op)(dst.fp(), kScratchDoubleReg);
} else {
if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
(assm->*sse_op)(dst.fp(), rhs.fp());
}
}
} // namespace liftoff
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
......@@ -1910,6 +1929,12 @@ void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vsubpd, &Assembler::subpd>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() != src.fp()) {
......@@ -1936,6 +1961,12 @@ void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vsubps, &Assembler::subps>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movq(dst.fp(), src.gp());
......@@ -1954,6 +1985,12 @@ void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubq, &Assembler::psubq>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -1972,6 +2009,12 @@ void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubd, &Assembler::psubd>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -1998,6 +2041,12 @@ void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubw, &Assembler::psubw>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......@@ -2024,6 +2073,12 @@ void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubb, &Assembler::psubb>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
cmpq(rsp, Operand(limit_address, 0));
j(below_equal, ool_code);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment