Commit 6b228044 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[wasm-simd][liftoff][x64][ia32] Implement load extend"

This is a reland of dd19a400

Original change's description:
> [wasm-simd][liftoff][x64][ia32] Implement load extend
> 
> The operations are implemented:
> 
> - i16x8.load8x8_s
> - i16x8.load8x8_u
> - i32x4.load16x4_s
> - i32x4.load16x4_u
> - i64x2.load32x2_s
> - i64x2.load32x2_u
> 
> on x64 and i32. The rest of the arch currently bail out, and will be
> implemented in subsequent patches.
> 
> The liftoff-compiler.cc code looks very similar to the one for LoadMem,
> the only difference is special handling of kSplat v.s. kExtend. kExtend
> always loads 8 bytes, so the bounds check and tracing is different.
> Compared to LoadMem there is less need for pinning, since the result is
> always going to be in a SIMD/FP register, which is different from the
> index/addr register.
> 
> The enum LoadTransformationKind was moved from
> function-body-decoder-impl.h to function-body-decoder.h so that no
> unncessary header file inclusions were needed to liftoff, and also it's
> a better place for it to live.
> 
> Bug: v8:9909
> Change-Id: I926bcc01c0c3c860223e8c08f91bc4ab3b75c399
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2203730
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#67914}

Bug: v8:9909
Change-Id: Ic1d8dcc00d9c5af0d51100a947161eaa315b7659
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2209268Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67930}
parent 3b1d24cf
......@@ -2137,6 +2137,14 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
}
}
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type,
LoadTransformationKind transform,
uint32_t* protected_load_pc) {
bailout(kSimd, "Load transform unimplemented");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
TurboAssembler::Move(dst.low_fp(), src.fp());
......
......@@ -1163,6 +1163,14 @@ void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
}
}
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type,
LoadTransformationKind transform,
uint32_t* protected_load_pc) {
bailout(kSimd, "Load transform unimplemented");
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V2D(), src.fp().D(), 0);
......
......@@ -2127,6 +2127,37 @@ void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
}
} // namespace liftoff
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type,
LoadTransformationKind transform,
uint32_t* protected_load_pc) {
DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
Operand src_op{src_addr, offset_reg, times_1,
static_cast<int32_t>(offset_imm)};
*protected_load_pc = pc_offset();
if (transform == LoadTransformationKind::kExtend) {
MachineType memtype = type.mem_type();
if (memtype == MachineType::Int8()) {
Pmovsxbw(dst.fp(), src_op);
} else if (memtype == MachineType::Uint8()) {
Pmovzxbw(dst.fp(), src_op);
} else if (memtype == MachineType::Int16()) {
Pmovsxwd(dst.fp(), src_op);
} else if (memtype == MachineType::Uint16()) {
Pmovzxwd(dst.fp(), src_op);
} else if (memtype == MachineType::Int32()) {
Pmovsxdq(dst.fp(), src_op);
} else if (memtype == MachineType::Uint32()) {
Pmovzxdq(dst.fp(), src_op);
}
} else {
DCHECK_EQ(LoadTransformationKind::kSplat, transform);
bailout(kSimd, "load splats unimplemented");
}
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......
......@@ -742,6 +742,11 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_f64_set_cond(Condition condition, Register dst,
DoubleRegister lhs, DoubleRegister rhs);
inline void LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type, LoadTransformationKind transform,
uint32_t* protected_load_pc);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src);
......
......@@ -8,6 +8,7 @@
#include "src/codegen/assembler-inl.h"
// TODO(clemensb): Remove dependences on compiler stuff.
#include "src/codegen/interface-descriptors.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/macro-assembler-inl.h"
#include "src/compiler/linkage.h"
#include "src/compiler/wasm-compiler.h"
......@@ -2042,11 +2043,54 @@ class LiftoffCompiler {
offset, decoder->position());
}
}
void LoadTransform(FullDecoder* decoder, LoadType type,
LoadTransformationKind transform,
const MemoryAccessImmediate<validate>& imm,
const Value& index_val, Value* result) {
unsupported(decoder, kSimd, "simd");
// LoadTransform requires SIMD support, so check for it here. If
// unsupported, bailout and let TurboFan lower the code.
if (!CheckSupportedType(decoder, kSupportedTypes, kWasmS128,
"LoadTransform")) {
return;
}
LiftoffRegList pinned;
Register index = pinned.set(__ PopToRegister()).gp();
// For load splats, LoadType is the size of the load, and for load
// extends, LoadType is the size of the lane, and it always loads 8 bytes.
uint32_t access_size =
transform == LoadTransformationKind::kExtend ? 8 : type.size();
if (BoundsCheckMem(decoder, access_size, imm.offset, index, pinned,
kDontForceCheck)) {
return;
}
uint32_t offset = imm.offset;
index = AddMemoryMasking(index, &offset, &pinned);
DEBUG_CODE_COMMENT("LoadTransform from memory");
Register addr = __ GetUnusedRegister(kGpReg, pinned).gp();
LOAD_INSTANCE_FIELD(addr, MemoryStart, kSystemPointerSize);
LiftoffRegister value = __ GetUnusedRegister(reg_class_for(kS128), {});
uint32_t protected_load_pc = 0;
__ LoadTransform(value, addr, index, offset, type, transform,
&protected_load_pc);
if (env_->use_trap_handler) {
AddOutOfLineTrap(decoder->position(),
WasmCode::kThrowWasmTrapMemOutOfBounds,
protected_load_pc);
}
__ PushRegister(ValueType{kS128}, value);
if (FLAG_trace_wasm_memory) {
// Again load extend is different.
MachineRepresentation mem_rep =
transform == LoadTransformationKind::kExtend
? MachineRepresentation::kWord64
: type.mem_type().representation();
TraceMemoryOperation(false, mem_rep, index, offset, decoder->position());
}
}
void StoreMem(FullDecoder* decoder, StoreType type,
......
......@@ -8,7 +8,6 @@
#include "src/wasm/baseline/liftoff-assembler.h"
#include "src/codegen/assembler.h"
#include "src/wasm/value-type.h"
namespace v8 {
namespace internal {
......@@ -2174,6 +2173,37 @@ void EmitI64x2ShrS(LiftoffAssembler* assm, LiftoffRegister dst,
} // namespace liftoff
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type,
LoadTransformationKind transform,
uint32_t* protected_load_pc) {
if (emit_debug_code() && offset_reg != no_reg) {
AssertZeroExtended(offset_reg);
}
Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
*protected_load_pc = pc_offset();
if (transform == LoadTransformationKind::kExtend) {
MachineType memtype = type.mem_type();
if (memtype == MachineType::Int8()) {
Pmovsxbw(dst.fp(), src_op);
} else if (memtype == MachineType::Uint8()) {
Pmovzxbw(dst.fp(), src_op);
} else if (memtype == MachineType::Int16()) {
Pmovsxwd(dst.fp(), src_op);
} else if (memtype == MachineType::Uint16()) {
Pmovzxwd(dst.fp(), src_op);
} else if (memtype == MachineType::Int32()) {
Pmovsxdq(dst.fp(), src_op);
} else if (memtype == MachineType::Uint32()) {
Pmovzxdq(dst.fp(), src_op);
}
} else {
DCHECK_EQ(LoadTransformationKind::kSplat, transform);
bailout(kSimd, "load splats unimplemented");
}
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movd(dst.fp(), src.gp());
......
......@@ -805,11 +805,6 @@ struct ControlBase {
}
};
enum class LoadTransformationKind : uint8_t {
kSplat,
kExtend,
};
// This is the list of callback functions that an interface for the
// WasmFullDecoder should implement.
// F(Name, args...)
......
......@@ -34,6 +34,8 @@ struct FunctionBody {
: sig(sig), offset(offset), start(start), end(end) {}
};
enum class LoadTransformationKind : uint8_t { kSplat, kExtend };
V8_EXPORT_PRIVATE DecodeResult VerifyWasmCode(AccountingAllocator* allocator,
const WasmFeatures& enabled,
const WasmModule* module,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment