Commit f7efe45b authored by Manos Koukoutos's avatar Manos Koukoutos Committed by V8 LUCI CQ

[wasm-gc] Improve performance of array.copy in TF

We use BuildCCall over CallBuiltin. This improves the performance of
array.copy by up to 2x for small arrays.

Bug: v8:7748
Change-Id: Ibbd6a69267edb229beda1f6de4ff1c48eb38b729
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3135580
Commit-Queue: Manos Koukoutos <manoskouk@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76661}
parent b9f282a9
......@@ -369,16 +369,6 @@ builtin WasmArrayCopyWithChecks(
SmiFromUint32(srcIndex), SmiFromUint32(length));
}
// We put all uint32 parameters at the beginning so that they are assigned to
// registers.
builtin WasmArrayCopy(
dstIndex: uint32, srcIndex: uint32, length: uint32, dstArray: WasmArray,
srcArray: WasmArray): JSAny {
tail runtime::WasmArrayCopy(
LoadContextFromFrame(), dstArray, SmiFromUint32(dstIndex), srcArray,
SmiFromUint32(srcIndex), SmiFromUint32(length));
}
// Redeclaration with different typing (value is an Object, not JSAny).
extern transitioning runtime
CreateDataProperty(implicit context: Context)(JSReceiver, JSAny, Object);
......
......@@ -400,6 +400,7 @@ IF_WASM(FUNCTION_REFERENCE, wasm_memory_fill, wasm::memory_fill_wrapper)
IF_WASM(FUNCTION_REFERENCE, wasm_float64_pow, wasm::float64_pow_wrapper)
IF_WASM(FUNCTION_REFERENCE, wasm_call_trap_callback_for_testing,
wasm::call_trap_callback_for_testing)
IF_WASM(FUNCTION_REFERENCE, wasm_array_copy, wasm::array_copy_wrapper)
static void f64_acos_wrapper(Address data) {
double input = ReadUnalignedValue<double>(data);
......
......@@ -247,6 +247,7 @@ class StatsCounter;
IF_WASM(V, wasm_memory_init, "wasm::memory_init") \
IF_WASM(V, wasm_memory_copy, "wasm::memory_copy") \
IF_WASM(V, wasm_memory_fill, "wasm::memory_fill") \
IF_WASM(V, wasm_array_copy, "wasm::array_copy") \
V(address_of_wasm_f64x2_convert_low_i32x4_u_int_mask, \
"wasm_f64x2_convert_low_i32x4_u_int_mask") \
V(supports_wasm_simd_128_address, "wasm::supports_wasm_simd_128_address") \
......
......@@ -5964,24 +5964,33 @@ Node* WasmGraphBuilder::ArrayLen(Node* array_object, CheckForNull null_check,
return gasm_->LoadWasmArrayLength(array_object);
}
// TODO(7748): Change {CallBuiltin} to {BuildCCall}. Add an option to copy in a
// loop for small array sizes. To find the length limit, run
// test/mjsunit/wasm/array-copy-benchmark.js.
// TODO(7748): Add an option to copy in a loop for small array sizes. To find
// the length limit, run test/mjsunit/wasm/array-copy-benchmark.js.
void WasmGraphBuilder::ArrayCopy(Node* dst_array, Node* dst_index,
Node* src_array, Node* src_index, Node* length,
CheckForNull dst_null_check, Node* src_array,
Node* src_index, CheckForNull src_null_check,
Node* length,
wasm::WasmCodePosition position) {
// TODO(7748): Skip null checks when possible.
TrapIfTrue(wasm::kTrapNullDereference, gasm_->WordEqual(dst_array, RefNull()),
position);
TrapIfTrue(wasm::kTrapNullDereference, gasm_->WordEqual(src_array, RefNull()),
position);
if (dst_null_check == kWithNullCheck) {
TrapIfTrue(wasm::kTrapNullDereference,
gasm_->WordEqual(dst_array, RefNull()), position);
}
if (src_null_check == kWithNullCheck) {
TrapIfTrue(wasm::kTrapNullDereference,
gasm_->WordEqual(src_array, RefNull()), position);
}
BoundsCheckArrayCopy(dst_array, dst_index, length, position);
BoundsCheckArrayCopy(src_array, src_index, length, position);
Operator::Properties copy_properties =
Operator::kIdempotent | Operator::kNoThrow | Operator::kNoDeopt;
// The builtin needs the int parameters first.
gasm_->CallBuiltin(Builtin::kWasmArrayCopy, copy_properties, dst_index,
src_index, length, dst_array, src_array);
Node* function =
gasm_->ExternalConstant(ExternalReference::wasm_array_copy());
MachineType arg_types[]{
MachineType::TaggedPointer(), MachineType::TaggedPointer(),
MachineType::Uint32(), MachineType::TaggedPointer(),
MachineType::Uint32(), MachineType::Uint32()};
MachineSignature sig(0, 6, arg_types);
BuildCCall(&sig, function, GetInstance(), dst_array, dst_index, src_array,
src_index, length);
}
// 1 bit V8 Smi tag, 31 bits V8 Smi shift, 1 bit i31ref high-bit truncation.
......
......@@ -469,9 +469,9 @@ class WasmGraphBuilder {
wasm::WasmCodePosition position);
Node* ArrayLen(Node* array_object, CheckForNull null_check,
wasm::WasmCodePosition position);
void ArrayCopy(Node* dst_array, Node* dst_index, Node* src_array,
Node* src_index, Node* length,
wasm::WasmCodePosition position);
void ArrayCopy(Node* dst_array, Node* dst_index, CheckForNull dst_null_check,
Node* src_array, Node* src_index, CheckForNull src_null_check,
Node* length, wasm::WasmCodePosition position);
Node* I31New(Node* input);
Node* I31GetS(Node* input);
Node* I31GetU(Node* input);
......
......@@ -5133,6 +5133,8 @@ class LiftoffCompiler {
void ArrayCopy(FullDecoder* decoder, const Value& dst, const Value& dst_index,
const Value& src, const Value& src_index,
const Value& length) {
// TODO(7748): Unify implementation with TF: Implement this with
// GenerateCCall. Remove runtime function and builtin in wasm.tq.
CallRuntimeStub(WasmCode::kWasmArrayCopyWithChecks,
MakeSig::Params(kI32, kI32, kI32, kOptRef, kOptRef),
// Builtin parameter order:
......
......@@ -977,7 +977,8 @@ class WasmGraphBuildingInterface {
void ArrayCopy(FullDecoder* decoder, const Value& dst, const Value& dst_index,
const Value& src, const Value& src_index,
const Value& length) {
builder_->ArrayCopy(dst.node, dst_index.node, src.node, src_index.node,
builder_->ArrayCopy(dst.node, dst_index.node, NullCheckFor(dst.type),
src.node, src_index.node, NullCheckFor(src.type),
length.node, decoder->position());
}
......
......@@ -117,7 +117,6 @@ struct WasmModule;
V(WasmAllocateArray_Uninitialized) \
V(WasmAllocateArray_InitNull) \
V(WasmAllocateArray_InitZero) \
V(WasmArrayCopy) \
V(WasmArrayCopyWithChecks) \
V(WasmAllocateRtt) \
V(WasmAllocateFreshRtt) \
......
......@@ -537,6 +537,53 @@ int32_t memory_fill_wrapper(Address data) {
return kSuccess;
}
namespace {
inline void* ArrayElementAddress(WasmArray array, uint32_t index,
int element_size_bytes) {
return reinterpret_cast<void*>(array.ptr() + WasmArray::kHeaderSize -
kHeapObjectTag + index * element_size_bytes);
}
} // namespace
void array_copy_wrapper(Address raw_instance, Address raw_dst_array,
uint32_t dst_index, Address raw_src_array,
uint32_t src_index, uint32_t length) {
ThreadNotInWasmScope thread_not_in_wasm_scope;
DisallowGarbageCollection no_gc;
WasmArray dst_array = WasmArray::cast(Object(raw_dst_array));
WasmArray src_array = WasmArray::cast(Object(raw_src_array));
bool overlapping_ranges =
dst_array.ptr() == src_array.ptr() &&
(dst_index < src_index ? dst_index + length > src_index
: src_index + length > dst_index);
wasm::ValueType element_type = src_array.type()->element_type();
if (element_type.is_reference()) {
WasmInstanceObject instance =
WasmInstanceObject::cast(Object(raw_instance));
Isolate* isolate = Isolate::FromRootAddress(instance.isolate_root());
ObjectSlot dst_slot = dst_array.ElementSlot(dst_index);
ObjectSlot src_slot = src_array.ElementSlot(src_index);
if (overlapping_ranges) {
isolate->heap()->MoveRange(dst_array, dst_slot, src_slot, length,
UPDATE_WRITE_BARRIER);
} else {
isolate->heap()->CopyRange(dst_array, dst_slot, src_slot, length,
UPDATE_WRITE_BARRIER);
}
} else {
int element_size_bytes = element_type.element_size_bytes();
void* dst = ArrayElementAddress(dst_array, dst_index, element_size_bytes);
void* src = ArrayElementAddress(src_array, src_index, element_size_bytes);
size_t copy_size = length * element_size_bytes;
if (overlapping_ranges) {
MemMove(dst, src, copy_size);
} else {
MemCopy(dst, src, copy_size);
}
}
}
static WasmTrapCallbackForTesting wasm_trap_callback_for_testing = nullptr;
void set_trap_callback_for_testing(WasmTrapCallbackForTesting callback) {
......
......@@ -111,6 +111,10 @@ int32_t memory_copy_wrapper(Address data);
// zero-extend the result in the return register.
int32_t memory_fill_wrapper(Address data);
void array_copy_wrapper(Address raw_instance, Address raw_dst_array,
uint32_t dst_index, Address raw_src_array,
uint32_t src_index, uint32_t length);
using WasmTrapCallbackForTesting = void (*)();
V8_EXPORT_PRIVATE void set_trap_callback_for_testing(
......
......@@ -14,11 +14,10 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
// - Change the value of {length} to find point at which the builtin becomes
// faster.
// - Change {array_type} if you want to test different types.
// Right now, the limit is found to be in the 25-30 range.
// TODO(7748): Measure again if we implement array.copy with a fast C call.
// Right now, the limit is found to be around 10.
(function ArrayCopyBenchmark() {
let array_length = 27;
let array_length = 10;
let iterations = 1;
var builder = new WasmModuleBuilder();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment