Commit 04b6e0de authored by Zhou, Zhiguo's avatar Zhou, Zhiguo Committed by Commit Bot

[wasm-simd][liftoff] Reorder SIMD opcodes

This reorders the SIMD opcodes together with their implementations
on x64, ia32 according to src/wasm/wasm-opcodes.h.

Bug: v8:9909
Change-Id: Ib2e75927b3b44ebc951005222c8641c256d8872c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2135074Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Cr-Commit-Position: refs/heads/master@{#67051}
parent 1277b5fa
...@@ -1968,218 +1968,225 @@ void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst, ...@@ -1968,218 +1968,225 @@ void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst,
} }
} // namespace liftoff } // namespace liftoff
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Movddup(dst.fp(), src.fp()); Movd(dst.fp(), src.gp());
Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Pshufb(dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src) {
uint8_t imm_lane_idx) { Movd(dst.fp(), src.gp());
if (CpuFeatures::IsSupported(AVX)) { Pshuflw(dst.fp(), dst.fp(), 0);
CpuFeatureScope scope(this, AVX); Pshufd(dst.fp(), dst.fp(), 0);
vshufpd(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufpd(dst.fp(), dst.fp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister src) {
LiftoffRegister src2, Movd(dst.fp(), src.gp());
uint8_t imm_lane_idx) { Pshufd(dst.fp(), dst.fp(), 0);
// TODO(fanchenk): Use movlhps and blendpd }
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Pinsrd(dst.fp(), src.low_gp(), 0);
Pinsrd(dst.fp(), src.high_gp(), 1);
Pshufd(dst.fp(), dst.fp(), 0x44);
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
if (imm_lane_idx == 0) { vshufps(dst.fp(), src.fp(), src.fp(), 0);
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00000000);
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01010000);
} else {
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00100000);
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01110000);
}
} else { } else {
CpuFeatureScope scope(this, SSE4_1); if (dst.fp() != src.fp()) {
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp()); movss(dst.fp(), src.fp());
if (imm_lane_idx == 0) {
insertps(dst.fp(), src2.fp(), 0b00000000);
insertps(dst.fp(), src2.fp(), 0b01010000);
} else {
insertps(dst.fp(), src2.fp(), 0b00100000);
insertps(dst.fp(), src2.fp(), 0b01110000);
} }
shufps(dst.fp(), src.fp(), 0);
} }
} }
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movddup(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_add_saturate_s(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdSub<&Assembler::vsubpd, &Assembler::subpd>(this, dst, lhs, LiftoffRegister rhs) {
rhs); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_add_saturate_u(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>( LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src) { LiftoffRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) { liftoff::EmitSimdSub<&Assembler::vpsubb, &Assembler::psubb>(this, dst, lhs,
CpuFeatureScope scope(this, AVX); rhs);
vshufps(dst.fp(), src.fp(), src.fp(), 0);
} else {
if (dst.fp() != src.fp()) {
movss(dst.fp(), src.fp());
}
shufps(dst.fp(), src.fp(), 0);
}
} }
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
LiftoffRegister tmp =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx); // I16x8 view of I8x16
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
vpsrlw(tmp.fp(), lhs.fp(), 8);
vpsrlw(liftoff::kScratchDoubleReg, rhs.fp(), 8);
// t = I16x8Mul(t0, t1)
//    => __PP __PP ...  __PP  __PP
vpmullw(tmp.fp(), tmp.fp(), liftoff::kScratchDoubleReg);
// s = left * 256
vpsllw(liftoff::kScratchDoubleReg, lhs.fp(), 8);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
vpmullw(dst.fp(), liftoff::kScratchDoubleReg, rhs.fp());
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
vpsrlw(dst.fp(), dst.fp(), 8);
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
vpsllw(tmp.fp(), tmp.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
vpor(dst.fp(), dst.fp(), tmp.fp());
} else { } else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp()); if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx); // I16x8 view of I8x16
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
movaps(tmp.fp(), dst.fp());
movaps(liftoff::kScratchDoubleReg, rhs.fp());
psrlw(tmp.fp(), 8);
psrlw(liftoff::kScratchDoubleReg, 8);
// dst = left * 256
psllw(dst.fp(), 8);
// t = I16x8Mul(t, s)
//    => __PP __PP ...  __PP  __PP
pmullw(tmp.fp(), liftoff::kScratchDoubleReg);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
pmullw(dst.fp(), rhs.fp());
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
psllw(tmp.fp(), 8);
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
psrlw(dst.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
por(dst.fp(), tmp.fp());
} }
} }
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister lhs,
LiftoffRegister src2, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
if (CpuFeatures::IsSupported(AVX)) { this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
CpuFeatureScope scope(this, AVX);
vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
}
} }
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>( LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdSub<&Assembler::vsubps, &Assembler::subps>(this, dst, lhs, LiftoffRegister rhs) {
rhs); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_add_saturate_s(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister lhs,
Pinsrd(dst.fp(), src.low_gp(), 0); LiftoffRegister rhs) {
Pinsrd(dst.fp(), src.high_gp(), 1); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
Pshufd(dst.fp(), dst.fp(), 0x44); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_add_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { LiftoffRegister rhs) {
Pextrd(dst.low_gp(), lhs.fp(), imm_lane_idx * 2); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
Pextrd(dst.high_gp(), lhs.fp(), imm_lane_idx * 2 + 1); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src1, LiftoffRegister rhs) {
LiftoffRegister src2, liftoff::EmitSimdSub<&Assembler::vpsubw, &Assembler::psubw>(this, dst, lhs,
uint8_t imm_lane_idx) { rhs);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrd(dst.fp(), src1.fp(), src2.low_gp(), imm_lane_idx * 2);
vpinsrd(dst.fp(), dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
pinsrd(dst.fp(), src2.low_gp(), imm_lane_idx * 2);
pinsrd(dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
}
} }
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdSub<&Assembler::vpsubq, &Assembler::psubq>(this, dst, lhs, LiftoffRegister rhs) {
rhs); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128); LiftoffRegister rhs) {
LiftoffRegister tmp1 = liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs)); this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
LiftoffRegister tmp2 =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs, tmp1));
Movaps(tmp1.fp(), lhs.fp());
Movaps(tmp2.fp(), rhs.fp());
// Multiply high dword of each qword of left with right.
Psrlq(tmp1.fp(), 32);
Pmuludq(tmp1.fp(), tmp1.fp(), rhs.fp());
// Multiply high dword of each qword of right with left.
Psrlq(tmp2.fp(), 32);
Pmuludq(tmp2.fp(), tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), tmp2.fp(), 32);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpmuludq(dst.fp(), lhs.fp(), rhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
pmuludq(dst.fp(), rhs.fp());
}
Paddq(dst.fp(), dst.fp(), tmp2.fp());
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
Pshufd(dst.fp(), dst.fp(), 0);
} }
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { LiftoffRegister rhs) {
Pextrd(dst.gp(), lhs.fp(), imm_lane_idx); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister lhs,
LiftoffRegister src2, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
if (CpuFeatures::IsSupported(AVX)) { this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
CpuFeatureScope scope(this, AVX);
vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
...@@ -2228,104 +2235,86 @@ void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst, ...@@ -2228,104 +2235,86 @@ void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src) { LiftoffRegister rhs) {
Movd(dst.fp(), src.gp()); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
Pshuflw(dst.fp(), dst.fp(), 0); this, dst, lhs, rhs);
Pshufd(dst.fp(), dst.fp(), 0);
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
} }
void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { liftoff::EmitSimdSub<&Assembler::vpsubq, &Assembler::psubq>(this, dst, lhs,
Pextrw(dst.gp(), lhs.fp(), imm_lane_idx); rhs);
movsx_w(dst.gp(), dst.gp());
} }
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src1, LiftoffRegister rhs) {
LiftoffRegister src2, static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
uint8_t imm_lane_idx) { LiftoffRegister tmp1 =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
LiftoffRegister tmp2 =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs, tmp1));
Movaps(tmp1.fp(), lhs.fp());
Movaps(tmp2.fp(), rhs.fp());
// Multiply high dword of each qword of left with right.
Psrlq(tmp1.fp(), 32);
Pmuludq(tmp1.fp(), tmp1.fp(), rhs.fp());
// Multiply high dword of each qword of right with left.
Psrlq(tmp2.fp(), 32);
Pmuludq(tmp2.fp(), tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), tmp2.fp(), 32);
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx); vpmuludq(dst.fp(), lhs.fp(), rhs.fp());
} else { } else {
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp()); if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
pinsrw(dst.fp(), src2.gp(), imm_lane_idx); pmuludq(dst.fp(), rhs.fp());
} }
Paddq(dst.fp(), dst.fp(), tmp2.fp());
} }
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_add_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubw, &Assembler::psubw>(this, dst, lhs, liftoff::EmitSimdSub<&Assembler::vsubps, &Assembler::subps>(this, dst, lhs,
rhs); rhs);
} }
void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_add_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
LiftoffRegister rhs) { liftoff::EmitSimdSub<&Assembler::vsubpd, &Assembler::subpd>(this, dst, lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>( rhs);
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
LiftoffRegister rhs) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { uint8_t imm_lane_idx) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>( Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); movsx_b(dst.gp(), dst.gp());
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
Pshufb(dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
...@@ -2334,11 +2323,54 @@ void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst, ...@@ -2334,11 +2323,54 @@ void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
Pextrb(dst.gp(), lhs.fp(), imm_lane_idx); Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
} }
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { uint8_t imm_lane_idx) {
Pextrb(dst.gp(), lhs.fp(), imm_lane_idx); Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
movsx_b(dst.gp(), dst.gp()); movsx_w(dst.gp(), dst.gp());
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
}
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
}
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrd(dst.low_gp(), lhs.fp(), imm_lane_idx * 2);
Pextrd(dst.high_gp(), lhs.fp(), imm_lane_idx * 2 + 1);
}
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx);
}
}
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vshufpd(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufpd(dst.fp(), dst.fp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
...@@ -2355,120 +2387,88 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, ...@@ -2355,120 +2387,88 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
} }
} }
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister src1,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>( LiftoffRegister src2,
this, dst, lhs, rhs); uint8_t imm_lane_idx) {
}
void LiftoffAssembler::emit_i8x16_add_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubb, &Assembler::psubb>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
LiftoffRegister tmp =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
// I16x8 view of I8x16 vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
vpsrlw(tmp.fp(), lhs.fp(), 8);
vpsrlw(liftoff::kScratchDoubleReg, rhs.fp(), 8);
// t = I16x8Mul(t0, t1)
//    => __PP __PP ...  __PP  __PP
vpmullw(tmp.fp(), tmp.fp(), liftoff::kScratchDoubleReg);
// s = left * 256
vpsllw(liftoff::kScratchDoubleReg, lhs.fp(), 8);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
vpmullw(dst.fp(), liftoff::kScratchDoubleReg, rhs.fp());
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
vpsrlw(dst.fp(), dst.fp(), 8);
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
vpsllw(tmp.fp(), tmp.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
vpor(dst.fp(), dst.fp(), tmp.fp());
} else { } else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp()); if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
// I16x8 view of I8x16 pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
movaps(tmp.fp(), dst.fp());
movaps(liftoff::kScratchDoubleReg, rhs.fp());
psrlw(tmp.fp(), 8);
psrlw(liftoff::kScratchDoubleReg, 8);
// dst = left * 256
psllw(dst.fp(), 8);
// t = I16x8Mul(t, s)
//    => __PP __PP ...  __PP  __PP
pmullw(tmp.fp(), liftoff::kScratchDoubleReg);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
pmullw(dst.fp(), rhs.fp());
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
psllw(tmp.fp(), 8);
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
psrlw(dst.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
por(dst.fp(), tmp.fp());
} }
} }
void LiftoffAssembler::emit_i8x16_add_saturate_u(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs); if (CpuFeatures::IsSupported(AVX)) {
} CpuFeatureScope scope(this, AVX);
vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst, } else {
LiftoffRegister lhs, CpuFeatureScope scope(this, SSE4_1);
LiftoffRegister rhs) { if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>( pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); }
} }
void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrd(dst.fp(), src1.fp(), src2.low_gp(), imm_lane_idx * 2);
vpinsrd(dst.fp(), dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
pinsrd(dst.fp(), src2.low_gp(), imm_lane_idx * 2);
pinsrd(dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
}
} }
void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
}
} }
void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs); // TODO(fanchenk): Use movlhps and blendpd
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
if (imm_lane_idx == 0) {
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00000000);
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01010000);
} else {
vinsertps(dst.fp(), src1.fp(), src2.fp(), 0b00100000);
vinsertps(dst.fp(), dst.fp(), src2.fp(), 0b01110000);
}
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
if (imm_lane_idx == 0) {
insertps(dst.fp(), src2.fp(), 0b00000000);
insertps(dst.fp(), src2.fp(), 0b01010000);
} else {
insertps(dst.fp(), src2.fp(), 0b00100000);
insertps(dst.fp(), src2.fp(), 0b01110000);
}
}
} }
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
......
...@@ -711,84 +711,44 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -711,84 +711,44 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f64_set_cond(Condition condition, Register dst, inline void emit_f64_set_cond(Condition condition, Register dst,
DoubleRegister lhs, DoubleRegister rhs); DoubleRegister lhs, DoubleRegister rhs);
inline void emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
uint8_t imm_lane_idx);
inline void emit_f64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx);
inline void emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_f32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx);
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx);
inline void emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src);
uint8_t imm_lane_idx); inline void emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1, inline void emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src);
LiftoffRegister src2, inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i8x16_add_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_add_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i32x4_min_s(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i8x16_min_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i32x4_min_u(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i8x16_min_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i32x4_max_s(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i8x16_max_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i32x4_max_u(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i8x16_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i16x8_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx);
inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i16x8_add_saturate_s(LiftoffRegister dst, inline void emit_i16x8_add_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i16x8_add_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i16x8_add_saturate_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_min_s(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_min_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i16x8_min_u(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_min_u(LiftoffRegister dst, LiftoffRegister lhs,
...@@ -797,36 +757,76 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -797,36 +757,76 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src); inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_min_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_min_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_max_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i8x16_extract_lane_u(LiftoffRegister dst, inline void emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx); uint8_t imm_lane_idx);
inline void emit_i8x16_extract_lane_s(LiftoffRegister dst, inline void emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx); uint8_t imm_lane_idx);
inline void emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_f32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_f64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs,
uint8_t imm_lane_idx);
inline void emit_i8x16_replace_lane(LiftoffRegister dst, LiftoffRegister src1, inline void emit_i8x16_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2, LiftoffRegister src2,
uint8_t imm_lane_idx); uint8_t imm_lane_idx);
inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i16x8_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister rhs); LiftoffRegister src2,
inline void emit_i8x16_add_saturate_s(LiftoffRegister dst, uint8_t imm_lane_idx);
LiftoffRegister lhs, inline void emit_i32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister rhs); LiftoffRegister src2,
inline void emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx);
LiftoffRegister rhs); inline void emit_i64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
inline void emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister src2,
LiftoffRegister rhs); uint8_t imm_lane_idx);
inline void emit_i8x16_add_saturate_u(LiftoffRegister dst, inline void emit_f32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister lhs, LiftoffRegister src2,
LiftoffRegister rhs); uint8_t imm_lane_idx);
inline void emit_i8x16_min_s(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_f64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister rhs); LiftoffRegister src2,
inline void emit_i8x16_min_u(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx);
LiftoffRegister rhs);
inline void emit_i8x16_max_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void StackCheck(Label* ool_code, Register limit_address); inline void StackCheck(Label* ool_code, Register limit_address);
......
...@@ -2294,82 +2294,30 @@ class LiftoffCompiler { ...@@ -2294,82 +2294,30 @@ class LiftoffCompiler {
return unsupported(decoder, kSimd, "simd"); return unsupported(decoder, kSimd, "simd");
} }
switch (opcode) { switch (opcode) {
case wasm::kExprF64x2Splat:
return EmitUnOp<kF64, kS128>(&LiftoffAssembler::emit_f64x2_splat);
case wasm::kExprF64x2Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_add);
case wasm::kExprF64x2Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_sub);
case wasm::kExprF64x2Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_mul);
case wasm::kExprF32x4Splat:
return EmitUnOp<kF32, kS128>(&LiftoffAssembler::emit_f32x4_splat);
case wasm::kExprF32x4Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_add);
case wasm::kExprF32x4Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_sub);
case wasm::kExprF32x4Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_mul);
case wasm::kExprI64x2Splat:
return EmitUnOp<kI64, kS128>(&LiftoffAssembler::emit_i64x2_splat);
case wasm::kExprI64x2Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_add);
case wasm::kExprI64x2Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_sub);
case wasm::kExprI64x2Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_mul);
case wasm::kExprI32x4Splat:
return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i32x4_splat);
case wasm::kExprI32x4Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_add);
case wasm::kExprI32x4Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_sub);
case wasm::kExprI32x4Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_mul);
case wasm::kExprI32x4MinS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_min_s);
case wasm::kExprI32x4MinU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_min_u);
case wasm::kExprI32x4MaxS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_max_s);
case wasm::kExprI32x4MaxU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_max_u);
case wasm::kExprI16x8Splat:
return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i16x8_splat);
case wasm::kExprI16x8Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_add);
case wasm::kExprI16x8AddSaturateS:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_add_saturate_s);
case wasm::kExprI16x8Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_sub);
case wasm::kExprI16x8Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_mul);
case wasm::kExprI16x8AddSaturateU:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_add_saturate_u);
case wasm::kExprI16x8MinS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_min_s);
case wasm::kExprI16x8MinU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_min_u);
case wasm::kExprI16x8MaxS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_s);
case wasm::kExprI16x8MaxU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_u);
case wasm::kExprI8x16Splat: case wasm::kExprI8x16Splat:
return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i8x16_splat); return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i8x16_splat);
case wasm::kExprI16x8Splat:
return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i16x8_splat);
case wasm::kExprI32x4Splat:
return EmitUnOp<kI32, kS128>(&LiftoffAssembler::emit_i32x4_splat);
case wasm::kExprI64x2Splat:
return EmitUnOp<kI64, kS128>(&LiftoffAssembler::emit_i64x2_splat);
case wasm::kExprF32x4Splat:
return EmitUnOp<kF32, kS128>(&LiftoffAssembler::emit_f32x4_splat);
case wasm::kExprF64x2Splat:
return EmitUnOp<kF64, kS128>(&LiftoffAssembler::emit_f64x2_splat);
case wasm::kExprI8x16Add: case wasm::kExprI8x16Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_add); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_add);
case wasm::kExprI8x16AddSaturateS: case wasm::kExprI8x16AddSaturateS:
return EmitBinOp<kS128, kS128>( return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i8x16_add_saturate_s); &LiftoffAssembler::emit_i8x16_add_saturate_s);
case wasm::kExprI8x16AddSaturateU:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i8x16_add_saturate_u);
case wasm::kExprI8x16Sub: case wasm::kExprI8x16Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_sub); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_sub);
case wasm::kExprI8x16Mul: case wasm::kExprI8x16Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_mul); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_mul);
case wasm::kExprI8x16AddSaturateU:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i8x16_add_saturate_u);
case wasm::kExprI8x16MinS: case wasm::kExprI8x16MinS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_min_s); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_min_s);
case wasm::kExprI8x16MinU: case wasm::kExprI8x16MinU:
...@@ -2378,6 +2326,58 @@ class LiftoffCompiler { ...@@ -2378,6 +2326,58 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_max_s); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_max_s);
case wasm::kExprI8x16MaxU: case wasm::kExprI8x16MaxU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_max_u); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_max_u);
case wasm::kExprI16x8Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_add);
case wasm::kExprI16x8AddSaturateS:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_add_saturate_s);
case wasm::kExprI16x8AddSaturateU:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_add_saturate_u);
case wasm::kExprI16x8Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_sub);
case wasm::kExprI16x8Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_mul);
case wasm::kExprI16x8MinS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_min_s);
case wasm::kExprI16x8MinU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_min_u);
case wasm::kExprI16x8MaxS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_s);
case wasm::kExprI16x8MaxU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_u);
case wasm::kExprI32x4Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_add);
case wasm::kExprI32x4Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_sub);
case wasm::kExprI32x4Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_mul);
case wasm::kExprI32x4MinS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_min_s);
case wasm::kExprI32x4MinU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_min_u);
case wasm::kExprI32x4MaxS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_max_s);
case wasm::kExprI32x4MaxU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_max_u);
case wasm::kExprI64x2Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_add);
case wasm::kExprI64x2Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_sub);
case wasm::kExprI64x2Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_mul);
case wasm::kExprF32x4Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_add);
case wasm::kExprF32x4Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_sub);
case wasm::kExprF32x4Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_mul);
case wasm::kExprF64x2Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_add);
case wasm::kExprF64x2Sub:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_sub);
case wasm::kExprF64x2Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f64x2_mul);
default: default:
unsupported(decoder, kSimd, "simd"); unsupported(decoder, kSimd, "simd");
} }
...@@ -2438,14 +2438,14 @@ class LiftoffCompiler { ...@@ -2438,14 +2438,14 @@ class LiftoffCompiler {
}, \ }, \
imm); \ imm); \
break; break;
CASE_SIMD_EXTRACT_LANE_OP(F64x2ExtractLane, F64, f64x2_extract_lane)
CASE_SIMD_EXTRACT_LANE_OP(F32x4ExtractLane, F32, f32x4_extract_lane)
CASE_SIMD_EXTRACT_LANE_OP(I64x2ExtractLane, I64, i64x2_extract_lane)
CASE_SIMD_EXTRACT_LANE_OP(I32x4ExtractLane, I32, i32x4_extract_lane)
CASE_SIMD_EXTRACT_LANE_OP(I16x8ExtractLaneU, I32, i16x8_extract_lane_u)
CASE_SIMD_EXTRACT_LANE_OP(I16x8ExtractLaneS, I32, i16x8_extract_lane_s)
CASE_SIMD_EXTRACT_LANE_OP(I8x16ExtractLaneU, I32, i8x16_extract_lane_u)
CASE_SIMD_EXTRACT_LANE_OP(I8x16ExtractLaneS, I32, i8x16_extract_lane_s) CASE_SIMD_EXTRACT_LANE_OP(I8x16ExtractLaneS, I32, i8x16_extract_lane_s)
CASE_SIMD_EXTRACT_LANE_OP(I8x16ExtractLaneU, I32, i8x16_extract_lane_u)
CASE_SIMD_EXTRACT_LANE_OP(I16x8ExtractLaneS, I32, i16x8_extract_lane_s)
CASE_SIMD_EXTRACT_LANE_OP(I16x8ExtractLaneU, I32, i16x8_extract_lane_u)
CASE_SIMD_EXTRACT_LANE_OP(I32x4ExtractLane, I32, i32x4_extract_lane)
CASE_SIMD_EXTRACT_LANE_OP(I64x2ExtractLane, I64, i64x2_extract_lane)
CASE_SIMD_EXTRACT_LANE_OP(F32x4ExtractLane, F32, f32x4_extract_lane)
CASE_SIMD_EXTRACT_LANE_OP(F64x2ExtractLane, F64, f64x2_extract_lane)
#undef CASE_SIMD_EXTRACT_LANE_OP #undef CASE_SIMD_EXTRACT_LANE_OP
#define CASE_SIMD_REPLACE_LANE_OP(opcode, type, fn) \ #define CASE_SIMD_REPLACE_LANE_OP(opcode, type, fn) \
case wasm::kExpr##opcode: \ case wasm::kExpr##opcode: \
...@@ -2456,12 +2456,12 @@ class LiftoffCompiler { ...@@ -2456,12 +2456,12 @@ class LiftoffCompiler {
}, \ }, \
imm); \ imm); \
break; break;
CASE_SIMD_REPLACE_LANE_OP(F64x2ReplaceLane, F64, f64x2_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(F32x4ReplaceLane, F32, f32x4_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(I64x2ReplaceLane, I64, i64x2_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(I32x4ReplaceLane, I32, i32x4_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(I16x8ReplaceLane, I32, i16x8_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(I8x16ReplaceLane, I32, i8x16_replace_lane) CASE_SIMD_REPLACE_LANE_OP(I8x16ReplaceLane, I32, i8x16_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(I16x8ReplaceLane, I32, i16x8_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(I32x4ReplaceLane, I32, i32x4_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(I64x2ReplaceLane, I64, i64x2_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(F32x4ReplaceLane, F32, f32x4_replace_lane)
CASE_SIMD_REPLACE_LANE_OP(F64x2ReplaceLane, F64, f64x2_replace_lane)
#undef CASE_SIMD_REPLACE_LANE_OP #undef CASE_SIMD_REPLACE_LANE_OP
default: default:
unsupported(decoder, kSimd, "simd"); unsupported(decoder, kSimd, "simd");
......
...@@ -1918,199 +1918,219 @@ void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst, ...@@ -1918,199 +1918,219 @@ void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst,
} }
} // namespace liftoff } // namespace liftoff
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Movddup(dst.fp(), src.fp()); Movd(dst.fp(), src.gp());
Pxor(kScratchDoubleReg, kScratchDoubleReg);
Pshufb(dst.fp(), kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src) {
uint8_t imm_lane_idx) { Movd(dst.fp(), src.gp());
Pextrq(kScratchRegister, lhs.fp(), static_cast<int8_t>(imm_lane_idx)); Pshuflw(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
Movq(dst.fp(), kScratchRegister); Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
} }
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister src) {
LiftoffRegister src2, Movd(dst.fp(), src.gp());
uint8_t imm_lane_idx) { Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
if (CpuFeatures::IsSupported(AVX)) { }
CpuFeatureScope scope(this, AVX);
if (imm_lane_idx == 0) { void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
vpblendw(dst.fp(), src1.fp(), src2.fp(), 0b00001111); LiftoffRegister src) {
} else { Movq(dst.fp(), src.gp());
vmovlhps(dst.fp(), src1.fp(), src2.fp()); Movddup(dst.fp(), dst.fp());
} }
} else {
CpuFeatureScope scope(this, SSE4_1); void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp()); LiftoffRegister src) {
if (imm_lane_idx == 0) { if (dst.fp() != src.fp()) {
pblendw(dst.fp(), src2.fp(), 0b00001111); Movss(dst.fp(), src.fp());
} else {
movlhps(dst.fp(), src2.fp());
}
} }
Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
} }
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movddup(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_add_saturate_s(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdSub<&Assembler::vsubpd, &Assembler::subpd>(this, dst, lhs, LiftoffRegister rhs) {
rhs); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_add_saturate_u(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>( LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src) { LiftoffRegister rhs) {
if (dst.fp() != src.fp()) { liftoff::EmitSimdSub<&Assembler::vpsubb, &Assembler::psubb>(this, dst, lhs,
Movss(dst.fp(), src.fp()); rhs);
}
Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
} }
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
LiftoffRegister tmp =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx); // I16x8 view of I8x16
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
vpsrlw(tmp.fp(), lhs.fp(), 8);
vpsrlw(kScratchDoubleReg, rhs.fp(), 8);
// t = I16x8Mul(t0, t1)
//    => __PP __PP ...  __PP  __PP
vpmullw(tmp.fp(), tmp.fp(), kScratchDoubleReg);
// s = left * 256
vpsllw(kScratchDoubleReg, lhs.fp(), 8);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
vpmullw(dst.fp(), kScratchDoubleReg, rhs.fp());
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
vpsrlw(dst.fp(), dst.fp(), 8);
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
vpsllw(tmp.fp(), tmp.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
vpor(dst.fp(), dst.fp(), tmp.fp());
} else { } else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp()); if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx); // I16x8 view of I8x16
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
movaps(tmp.fp(), dst.fp());
movaps(kScratchDoubleReg, rhs.fp());
psrlw(tmp.fp(), 8);
psrlw(kScratchDoubleReg, 8);
// dst = left * 256
psllw(dst.fp(), 8);
// t = I16x8Mul(t, s)
//    => __PP __PP ...  __PP  __PP
pmullw(tmp.fp(), kScratchDoubleReg);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
pmullw(dst.fp(), rhs.fp());
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
psllw(tmp.fp(), 8);
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
psrlw(dst.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
por(dst.fp(), tmp.fp());
} }
} }
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister lhs,
LiftoffRegister src2, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
if (CpuFeatures::IsSupported(AVX)) { this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
CpuFeatureScope scope(this, AVX);
vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
}
} }
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>( LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdSub<&Assembler::vsubps, &Assembler::subps>(this, dst, lhs, LiftoffRegister rhs) {
rhs); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>( LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src) { LiftoffRegister rhs) {
Movq(dst.fp(), src.gp()); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
Movddup(dst.fp(), dst.fp()); this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
} }
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_add_saturate_s(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister lhs,
LiftoffRegister src2, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
if (CpuFeatures::IsSupported(AVX)) { this, dst, lhs, rhs);
CpuFeatureScope scope(this, AVX);
vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_add_saturate_u(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>( LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubq, &Assembler::psubq>(this, dst, lhs, liftoff::EmitSimdSub<&Assembler::vpsubw, &Assembler::psubw>(this, dst, lhs,
rhs); rhs);
} }
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
LiftoffRegister tmp1 = this, dst, lhs, rhs);
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
LiftoffRegister tmp2 =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs, tmp1));
Movaps(tmp1.fp(), lhs.fp());
Movaps(tmp2.fp(), rhs.fp());
// Multiply high dword of each qword of left with right.
Psrlq(tmp1.fp(), 32);
Pmuludq(tmp1.fp(), rhs.fp());
// Multiply high dword of each qword of right with left.
Psrlq(tmp2.fp(), 32);
Pmuludq(tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), 32);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpmuludq(dst.fp(), lhs.fp(), rhs.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
pmuludq(dst.fp(), rhs.fp());
}
Paddq(dst.fp(), tmp2.fp());
} }
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister lhs,
Movd(dst.fp(), src.gp()); LiftoffRegister rhs) {
Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0)); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { LiftoffRegister rhs) {
Pextrd(dst.gp(), lhs.fp(), imm_lane_idx); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister lhs,
LiftoffRegister src2, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
if (CpuFeatures::IsSupported(AVX)) { this, dst, lhs, rhs);
CpuFeatureScope scope(this, AVX); }
vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
} else { void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
CpuFeatureScope scope(this, SSE4_1); LiftoffRegister lhs,
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp()); LiftoffRegister rhs) {
pinsrd(dst.fp(), src2.gp(), imm_lane_idx); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
} this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
...@@ -2159,117 +2179,136 @@ void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst, ...@@ -2159,117 +2179,136 @@ void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src) { LiftoffRegister rhs) {
Movd(dst.fp(), src.gp()); liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
Pshuflw(dst.fp(), dst.fp(), static_cast<uint8_t>(0)); this, dst, lhs, rhs);
Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
} }
void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
uint8_t imm_lane_idx) { liftoff::EmitSimdSub<&Assembler::vpsubq, &Assembler::psubq>(this, dst, lhs,
Pextrw(dst.gp(), lhs.fp(), imm_lane_idx); rhs);
movsxwl(dst.gp(), dst.gp());
} }
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister src1, LiftoffRegister rhs) {
LiftoffRegister src2, static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
uint8_t imm_lane_idx) { LiftoffRegister tmp1 =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
LiftoffRegister tmp2 =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs, tmp1));
Movaps(tmp1.fp(), lhs.fp());
Movaps(tmp2.fp(), rhs.fp());
// Multiply high dword of each qword of left with right.
Psrlq(tmp1.fp(), 32);
Pmuludq(tmp1.fp(), rhs.fp());
// Multiply high dword of each qword of right with left.
Psrlq(tmp2.fp(), 32);
Pmuludq(tmp2.fp(), lhs.fp());
Paddq(tmp2.fp(), tmp1.fp());
Psllq(tmp2.fp(), 32);
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx); vpmuludq(dst.fp(), lhs.fp(), rhs.fp());
} else { } else {
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp()); if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
pinsrw(dst.fp(), src2.gp(), imm_lane_idx); pmuludq(dst.fp(), rhs.fp());
} }
Paddq(dst.fp(), tmp2.fp());
} }
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_add_saturate_s(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister lhs, LiftoffRegister rhs) {
LiftoffRegister rhs) { liftoff::EmitSimdSub<&Assembler::vsubps, &Assembler::subps>(this, dst, lhs,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>( rhs);
}
void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubw, &Assembler::psubw>(this, dst, lhs, liftoff::EmitSimdSub<&Assembler::vsubpd, &Assembler::subpd>(this, dst, lhs,
rhs); rhs);
} }
void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
this, dst, lhs, rhs); this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_add_saturate_u(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { uint8_t imm_lane_idx) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>( Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
this, dst, lhs, rhs); movsxbl(dst.gp(), dst.gp());
} }
void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { uint8_t imm_lane_idx) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>( Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { uint8_t imm_lane_idx) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>( Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); movsxwl(dst.gp(), dst.gp());
} }
void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { uint8_t imm_lane_idx) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>( Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { uint8_t imm_lane_idx) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>( Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
} }
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister lhs,
Movd(dst.fp(), src.gp()); uint8_t imm_lane_idx) {
Pxor(kScratchDoubleReg, kScratchDoubleReg); Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
Pshufb(dst.fp(), kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { uint8_t imm_lane_idx) {
Pextrb(dst.gp(), lhs.fp(), imm_lane_idx); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
uint8_t imm_lane_idx) { uint8_t imm_lane_idx) {
Pextrb(dst.gp(), lhs.fp(), imm_lane_idx); Pextrq(kScratchRegister, lhs.fp(), static_cast<int8_t>(imm_lane_idx));
movsxbl(dst.gp(), dst.gp()); Movq(dst.fp(), kScratchRegister);
} }
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
...@@ -2286,120 +2325,81 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, ...@@ -2286,120 +2325,81 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
} }
} }
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister rhs) { LiftoffRegister src1,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>( LiftoffRegister src2,
this, dst, lhs, rhs); uint8_t imm_lane_idx) {
}
void LiftoffAssembler::emit_i8x16_add_saturate_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdSub<&Assembler::vpsubb, &Assembler::psubb>(this, dst, lhs,
rhs);
}
void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
LiftoffRegister tmp =
GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
// I16x8 view of I8x16 vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
vpsrlw(tmp.fp(), lhs.fp(), 8);
vpsrlw(kScratchDoubleReg, rhs.fp(), 8);
// t = I16x8Mul(t0, t1)
//    => __PP __PP ...  __PP  __PP
vpmullw(tmp.fp(), tmp.fp(), kScratchDoubleReg);
// s = left * 256
vpsllw(kScratchDoubleReg, lhs.fp(), 8);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
vpmullw(dst.fp(), kScratchDoubleReg, rhs.fp());
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
vpsrlw(dst.fp(), dst.fp(), 8);
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
vpsllw(tmp.fp(), tmp.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
vpor(dst.fp(), dst.fp(), tmp.fp());
} else { } else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp()); if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
// I16x8 view of I8x16 pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
// left = AAaa AAaa ... AAaa AAaa
// right= BBbb BBbb ... BBbb BBbb
// t = 00AA 00AA ... 00AA 00AA
// s = 00BB 00BB ... 00BB 00BB
movaps(tmp.fp(), dst.fp());
movaps(kScratchDoubleReg, rhs.fp());
psrlw(tmp.fp(), 8);
psrlw(kScratchDoubleReg, 8);
// dst = left * 256
psllw(dst.fp(), 8);
// t = I16x8Mul(t, s)
//    => __PP __PP ...  __PP  __PP
pmullw(tmp.fp(), kScratchDoubleReg);
// dst = I16x8Mul(left * 256, right)
//    => pp__ pp__ ...  pp__  pp__
pmullw(dst.fp(), rhs.fp());
// t = I16x8Shl(t, 8)
//    => PP00 PP00 ...  PP00  PP00
psllw(tmp.fp(), 8);
// dst = I16x8Shr(dst, 8)
//    => 00pp 00pp ...  00pp  00pp
psrlw(dst.fp(), 8);
// dst = I16x8Or(dst, t)
//    => PPpp PPpp ...  PPpp  PPpp
por(dst.fp(), tmp.fp());
} }
} }
void LiftoffAssembler::emit_i8x16_add_saturate_u(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs); if (CpuFeatures::IsSupported(AVX)) {
} CpuFeatureScope scope(this, AVX);
vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst, } else {
LiftoffRegister lhs, CpuFeatureScope scope(this, SSE4_1);
LiftoffRegister rhs) { if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>( pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); }
} }
void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
}
} }
void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1)); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
}
} }
void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst, void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister src1,
LiftoffRegister rhs) { LiftoffRegister src2,
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>( uint8_t imm_lane_idx) {
this, dst, lhs, rhs); if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
if (imm_lane_idx == 0) {
vpblendw(dst.fp(), src1.fp(), src2.fp(), 0b00001111);
} else {
vmovlhps(dst.fp(), src1.fp(), src2.fp());
}
} else {
CpuFeatureScope scope(this, SSE4_1);
if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
if (imm_lane_idx == 0) {
pblendw(dst.fp(), src2.fp(), 0b00001111);
} else {
movlhps(dst.fp(), src2.fp());
}
}
} }
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment