Commit 8e6c0bb0 authored by peterwmwong's avatar peterwmwong Committed by Commit Bot

[typedarray] Speed up and simplify TypedArray#set overlap cases

- Remove CSA fallback to runtime for overlap cases
- Move overlap handling from runtime into elements.cc

Depending on typed array size (and overlap size), quick measurements
show a 4.3x - 8x improvement.  This fixes large >5x performance
cliff between overlap and non-overlap cases.
https://github.com/peterwmwong/v8-perf/blob/master/typedarray-set-overlap-fast-c-call/README.md

Bug: v8:5929
Change-Id: I47758d2ee603bc26f38dd9112567113cf887f5d5
Reviewed-on: https://chromium-review.googlesource.com/846378Reviewed-by: 's avatarCamillo Bruni <cbruni@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Peter Wong <peter.wm.wong@gmail.com>
Cr-Commit-Position: refs/heads/master@{#50578}
parent 74a2a8f6
......@@ -819,24 +819,9 @@ void TypedArrayBuiltinsAssembler::SetTypedArraySource(
BIND(&fast_c_call);
{
// Overlapping backing stores of different element kinds are handled in
// runtime. We're a bit conservative here and bail to runtime if ranges
// overlap and element kinds differ.
TNode<IntPtrT> target_byte_length =
IntPtrMul(target_length, target_el_size);
CSA_ASSERT(
this, UintPtrGreaterThanOrEqual(target_byte_length, IntPtrConstant(0)));
TNode<IntPtrT> target_data_end_ptr =
IntPtrAdd(target_data_ptr, target_byte_length);
TNode<IntPtrT> source_data_end_ptr =
IntPtrAdd(source_data_ptr, source_byte_length);
GotoIfNot(
Word32Or(UintPtrLessThanOrEqual(target_data_end_ptr, source_data_ptr),
UintPtrLessThanOrEqual(source_data_end_ptr, target_data_ptr)),
call_runtime);
this, UintPtrGreaterThanOrEqual(
IntPtrMul(target_length, target_el_size), IntPtrConstant(0)));
TNode<IntPtrT> source_length =
LoadAndUntagObjectField(source, JSTypedArray::kLengthOffset);
......
......@@ -12,6 +12,7 @@
#include "src/messages.h"
#include "src/objects-inl.h"
#include "src/utils.h"
#include "src/zone/zone.h"
// Each concrete ElementsAccessor can handle exactly one ElementsKind,
// several abstract ElementsAccessor classes are used to allow sharing
......@@ -3243,13 +3244,16 @@ class TypedElementsAccessor
}
template <typename SourceTraits>
static void CopyBetweenBackingStores(FixedTypedArrayBase* source,
static void CopyBetweenBackingStores(void* source_data_ptr,
BackingStore* dest, size_t length,
uint32_t offset) {
FixedTypedArray<SourceTraits>* source_fta =
FixedTypedArray<SourceTraits>::cast(source);
DisallowHeapAllocation no_gc;
for (uint32_t i = 0; i < length; i++) {
typename SourceTraits::ElementType elem = source_fta->get_scalar(i);
// We use scalar accessors to avoid boxing/unboxing, so there are no
// allocations.
typename SourceTraits::ElementType elem =
FixedTypedArray<SourceTraits>::get_scalar_from_data_ptr(
source_data_ptr, i);
dest->set(offset + i, dest->from(elem));
}
}
......@@ -3280,15 +3284,10 @@ class TypedElementsAccessor
bool both_are_simple = HasSimpleRepresentation(source_type) &&
HasSimpleRepresentation(destination_type);
// We assume the source and destination don't overlap, even though they
// can share the same buffer. This is always true for newly allocated
// TypedArrays.
uint8_t* source_data = static_cast<uint8_t*>(source_elements->DataPtr());
uint8_t* dest_data = static_cast<uint8_t*>(destination_elements->DataPtr());
size_t source_byte_length = NumberToSize(source->byte_length());
size_t dest_byte_length = NumberToSize(destination->byte_length());
CHECK(dest_data + dest_byte_length <= source_data ||
source_data + source_byte_length <= dest_data);
// We can simply copy the backing store if the types are the same, or if
// we are converting e.g. Uint8 <-> Int8, as the binary representation
......@@ -3296,16 +3295,25 @@ class TypedElementsAccessor
// which have special conversion operations.
if (same_type || (same_size && both_are_simple)) {
size_t element_size = source->element_size();
std::memcpy(dest_data + offset * element_size, source_data,
length * element_size);
std::memmove(dest_data + offset * element_size, source_data,
length * element_size);
} else {
// We use scalar accessors below to avoid boxing/unboxing, so there are
// no allocations.
Isolate* isolate = source->GetIsolate();
Zone zone(isolate->allocator(), ZONE_NAME);
// If the typedarrays are overlapped, clone the source.
if (dest_data + dest_byte_length > source_data &&
source_data + source_byte_length > dest_data) {
uint8_t* temp_data = zone.NewArray<uint8_t>(source_byte_length);
std::memcpy(temp_data, source_data, source_byte_length);
source_data = temp_data;
}
switch (source->GetElementsKind()) {
#define TYPED_ARRAY_CASE(Type, type, TYPE, ctype, size) \
case TYPE##_ELEMENTS: \
CopyBetweenBackingStores<Type##ArrayTraits>( \
source_elements, destination_elements, length, offset); \
#define TYPED_ARRAY_CASE(Type, type, TYPE, ctype, size) \
case TYPE##_ELEMENTS: \
CopyBetweenBackingStores<Type##ArrayTraits>( \
source_data, destination_elements, length, offset); \
break;
TYPED_ARRAYS(TYPED_ARRAY_CASE)
default:
......
......@@ -452,6 +452,14 @@ double Float64ArrayTraits::defaultValue() {
template <class Traits>
typename Traits::ElementType FixedTypedArray<Traits>::get_scalar(int index) {
DCHECK((index >= 0) && (index < this->length()));
return FixedTypedArray<Traits>::get_scalar_from_data_ptr(DataPtr(), index);
}
// static
template <class Traits>
typename Traits::ElementType FixedTypedArray<Traits>::get_scalar_from_data_ptr(
void* data_ptr, int index) {
typename Traits::ElementType* ptr = reinterpret_cast<ElementType*>(data_ptr);
// The JavaScript memory model allows for racy reads and writes to a
// SharedArrayBuffer's backing store, which will always be a FixedTypedArray.
// ThreadSanitizer will catch these racy accesses and warn about them, so we
......@@ -460,7 +468,6 @@ typename Traits::ElementType FixedTypedArray<Traits>::get_scalar(int index) {
// We don't use relaxed atomics here, as it is not a requirement of the
// JavaScript memory model to have tear-free reads of overlapping accesses,
// and using relaxed atomics may introduce overhead.
auto* ptr = reinterpret_cast<ElementType*>(DataPtr());
TSAN_ANNOTATE_IGNORE_READS_BEGIN;
auto result = ptr[index];
TSAN_ANNOTATE_IGNORE_READS_END;
......
......@@ -540,6 +540,7 @@ class FixedTypedArray : public FixedTypedArrayBase {
DECL_CAST(FixedTypedArray<Traits>)
static inline ElementType get_scalar_from_data_ptr(void* data_ptr, int index);
inline ElementType get_scalar(int index);
static inline Handle<Object> get(FixedTypedArray* array, int index);
inline void set(int index, ElementType value);
......
......@@ -214,101 +214,6 @@ RUNTIME_FUNCTION(Runtime_TypedArraySpeciesCreateByLength) {
return *result_array;
}
namespace {
Object* TypedArraySetFromOverlapping(Isolate* isolate,
Handle<JSTypedArray> target,
Handle<JSTypedArray> source,
uint32_t offset) {
#ifdef DEBUG
Handle<FixedTypedArrayBase> source_elements(
FixedTypedArrayBase::cast(source->elements()));
Handle<FixedTypedArrayBase> target_elements(
FixedTypedArrayBase::cast(target->elements()));
uint8_t* source_data = static_cast<uint8_t*>(source_elements->DataPtr());
uint8_t* target_data = static_cast<uint8_t*>(target_elements->DataPtr());
size_t source_byte_length = NumberToSize(source->byte_length());
size_t target_byte_length = NumberToSize(target->byte_length());
CHECK_LE(offset, target->length_value());
CHECK_LE(source->length_value(), target->length_value() - offset);
CHECK(source->length()->IsSmi());
CHECK(!target->WasNeutered());
CHECK(!source->WasNeutered());
// Assert that target and source in fact overlapping.
CHECK(target_data + target_byte_length > source_data &&
source_data + source_byte_length > target_data);
#endif
size_t sourceElementSize = source->element_size();
size_t targetElementSize = target->element_size();
uint32_t source_length = source->length_value();
if (source_length == 0) return isolate->heap()->undefined_value();
// Copy left part.
// First un-mutated byte after the next write
uint32_t target_ptr = 0;
CHECK(target->byte_offset()->ToUint32(&target_ptr));
target_ptr += (offset + 1) * targetElementSize;
// Next read at sourcePtr. We do not care for memory changing before
// sourcePtr - we have already copied it.
uint32_t source_ptr = 0;
CHECK(source->byte_offset()->ToUint32(&source_ptr));
ElementsAccessor* source_accessor = source->GetElementsAccessor();
ElementsAccessor* target_accessor = target->GetElementsAccessor();
uint32_t left_index;
for (left_index = 0; left_index < source_length && target_ptr <= source_ptr;
left_index++) {
Handle<Object> value = source_accessor->Get(source, left_index);
target_accessor->Set(target, offset + left_index, *value);
target_ptr += targetElementSize;
source_ptr += sourceElementSize;
}
// Copy right part;
// First unmutated byte before the next write
CHECK(target->byte_offset()->ToUint32(&target_ptr));
target_ptr += (offset + source_length - 1) * targetElementSize;
// Next read before sourcePtr. We do not care for memory changing after
// sourcePtr - we have already copied it.
CHECK(target->byte_offset()->ToUint32(&source_ptr));
source_ptr += source_length * sourceElementSize;
uint32_t right_index;
DCHECK_GE(source_length, 1);
for (right_index = source_length - 1;
right_index > left_index && target_ptr >= source_ptr; right_index--) {
Handle<Object> value = source_accessor->Get(source, right_index);
target_accessor->Set(target, offset + right_index, *value);
target_ptr -= targetElementSize;
source_ptr -= sourceElementSize;
}
std::vector<Handle<Object>> temp(right_index + 1 - left_index);
for (uint32_t i = left_index; i <= right_index; i++) {
temp[i - left_index] = source_accessor->Get(source, i);
}
for (uint32_t i = left_index; i <= right_index; i++) {
target_accessor->Set(target, offset + i, *temp[i - left_index]);
}
return isolate->heap()->undefined_value();
}
} // namespace
// 22.2.3.23 %TypedArray%.prototype.set ( overloaded [ , offset ] )
RUNTIME_FUNCTION(Runtime_TypedArraySet) {
HandleScope scope(isolate);
......@@ -317,6 +222,7 @@ RUNTIME_FUNCTION(Runtime_TypedArraySet) {
Handle<Smi> offset = args.at<Smi>(2);
DCHECK(!target->WasNeutered()); // Checked in TypedArrayPrototypeSet.
DCHECK(!obj->IsJSTypedArray()); // Should be handled by CSA.
DCHECK_LE(0, offset->value());
const uint32_t uint_offset = static_cast<uint32_t>(offset->value());
......@@ -328,10 +234,6 @@ RUNTIME_FUNCTION(Runtime_TypedArraySet) {
// (Consistent with Firefox and Blink/WebKit)
THROW_NEW_ERROR_RETURN_FAILURE(
isolate, NewTypeError(MessageTemplate::kInvalidArgument));
} else if (obj->IsJSTypedArray()) {
// The non-overlapping case is handled in CSA.
Handle<JSTypedArray> source = Handle<JSTypedArray>::cast(obj);
return TypedArraySetFromOverlapping(isolate, target, source, uint_offset);
}
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, obj,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment