Commit 6b6c1b4f authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff][arm][arm64] Implement replace_lane

All 6 replace_lane operations for ARM and ARM64.

Changes to wasm-compiler required for ARM (where register aliasing is
different - 2 fp registers map to 1 simd register). When src2_rc is
kFpReg, and src1_rc is kFpRegPair, we still need to pin src2 when
getting a register for src1, since the registers can overlap. (On other
backends, src1_rc == src2_rc == kFpReg, so the current condition is met,
and we correctly pin src2). Same for result_rc and src2_rc.

Bug: v8:9909
Change-Id: Ib39b71534290a7109fb0da4bf62dabc66460a0dd
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2117637
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66893}
parent 12e3c846
......@@ -1570,7 +1570,9 @@ void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "f64x2replacelane");
ReplaceLane(liftoff::GetSimd128Register(dst.low_fp()),
liftoff::GetSimd128Register(src1.low_fp()), src2.fp(),
imm_lane_idx);
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1601,7 +1603,9 @@ void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "f32x4replacelane");
ReplaceLane(liftoff::GetSimd128Register(dst.low_fp()),
liftoff::GetSimd128Register(src1.low_fp()),
liftoff::GetFloatRegister(src2.fp()), imm_lane_idx);
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1639,7 +1643,11 @@ void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i64x2replacelane");
Simd128Register dst_simd = liftoff::GetSimd128Register(dst.low_fp());
Simd128Register src1_simd = liftoff::GetSimd128Register(src1.low_fp());
ReplaceLane(dst_simd, src1_simd, src2.low_gp(), NeonS32, imm_lane_idx * 2);
ReplaceLane(dst_simd, dst_simd, src2.high_gp(), NeonS32,
imm_lane_idx * 2 + 1);
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1672,7 +1680,9 @@ void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i32x4replacelane");
ReplaceLane(liftoff::GetSimd128Register(dst.low_fp()),
liftoff::GetSimd128Register(src1.low_fp()), src2.gp(), NeonS32,
imm_lane_idx);
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1726,7 +1736,9 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i16x8replacelane");
ReplaceLane(liftoff::GetSimd128Register(dst.low_fp()),
liftoff::GetSimd128Register(src1.low_fp()), src2.gp(), NeonS16,
imm_lane_idx);
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
......@@ -1752,7 +1764,9 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i8x16replacelane");
ReplaceLane(liftoff::GetSimd128Register(dst.low_fp()),
liftoff::GetSimd128Register(src1.low_fp()), src2.gp(), NeonS8,
imm_lane_idx);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
......@@ -1099,7 +1099,10 @@ void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "f64x2replacelane");
if (dst != src1) {
Mov(dst.fp().V2D(), src1.fp().V2D());
}
Mov(dst.fp().V2D(), imm_lane_idx, src2.fp().V2D(), 0);
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1127,7 +1130,10 @@ void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "f32x4replacelane");
if (dst != src1) {
Mov(dst.fp().V4S(), src1.fp().V4S());
}
Mov(dst.fp().V4S(), imm_lane_idx, src2.fp().V4S(), 0);
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1155,7 +1161,10 @@ void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i64x2replacelane");
if (dst != src1) {
Mov(dst.fp().V2D(), src1.fp().V2D());
}
Mov(dst.fp().V2D(), imm_lane_idx, src2.gp().X());
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1183,7 +1192,10 @@ void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i32x4replacelane");
if (dst != src1) {
Mov(dst.fp().V4S(), src1.fp().V4S());
}
Mov(dst.fp().V4S(), imm_lane_idx, src2.gp().W());
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1217,7 +1229,10 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i16x8replacelane");
if (dst != src1) {
Mov(dst.fp().V8H(), src1.fp().V8H());
}
Mov(dst.fp().V8H(), imm_lane_idx, src2.gp().W());
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1251,7 +1266,10 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
bailout(kSimd, "i8x16replacelane");
if (dst != src1) {
Mov(dst.fp().V16B(), src1.fp().V16B());
}
Mov(dst.fp().V16B(), imm_lane_idx, src2.gp().W());
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
......@@ -2427,13 +2427,19 @@ class LiftoffCompiler {
static constexpr RegClass src1_rc = reg_class_for(ValueType::kS128);
static constexpr RegClass src2_rc = reg_class_for(src2_type);
static constexpr RegClass result_rc = reg_class_for(ValueType::kS128);
// On backends which need fp pair, src1_rc and result_rc end up being
// kFpRegPair, which is != kFpReg, but we still want to pin src2 when it is
// kFpReg, since it can overlap with those pairs.
static constexpr bool pin_src2 = kNeedS128RegPair && src2_rc == kFpReg;
// Does not work for arm
LiftoffRegister src2 = __ PopToRegister();
LiftoffRegister src1 = src1_rc == src2_rc
LiftoffRegister src1 = (src1_rc == src2_rc || pin_src2)
? __ PopToRegister(LiftoffRegList::ForRegs(src2))
: __
PopToRegister();
LiftoffRegister dst =
src2_rc == result_rc
(src2_rc == result_rc || pin_src2)
? __ GetUnusedRegister(result_rc, {src1},
LiftoffRegList::ForRegs(src2))
: __ GetUnusedRegister(result_rc, {src1});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment