Commit ec8b2472 authored by Andy Wingo's avatar Andy Wingo Committed by V8 LUCI CQ

[stringrefs] Implement string.new_wtf16

Bug: v8:12868
Change-Id: Ied5e985db139517ec34446bcb8b16041e5d79af7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3663092
Commit-Queue: Andy Wingo <wingo@igalia.com>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80823}
parent 84d07481
......@@ -40,6 +40,8 @@ extern runtime WasmArrayInitFromData(
Context, WasmInstanceObject, Smi, Smi, Smi, Map): Object;
extern runtime WasmStringNewWtf8(
Context, WasmInstanceObject, Smi, Number, Number): String;
extern runtime WasmStringNewWtf16(
Context, WasmInstanceObject, Smi, Number, Number): String;
}
namespace unsafe {
......@@ -787,4 +789,11 @@ builtin WasmStringNewWtf8(
LoadContextFromInstance(instance), instance, SmiFromUint32(memory),
WasmUint32ToNumber(offset), WasmUint32ToNumber(size));
}
builtin WasmStringNewWtf16(
memory: uint32, offset: uint32, size: uint32): String {
const instance = LoadInstanceFromFrame();
tail runtime::WasmStringNewWtf16(
LoadContextFromInstance(instance), instance, SmiFromUint32(memory),
WasmUint32ToNumber(offset), WasmUint32ToNumber(size));
}
}
......@@ -5730,6 +5730,12 @@ Node* WasmGraphBuilder::StringNewWtf8(uint32_t memory, Node* offset,
gasm_->Uint32Constant(memory), offset, size);
}
Node* WasmGraphBuilder::StringNewWtf16(uint32_t memory, Node* offset,
Node* size) {
return gasm_->CallBuiltin(Builtin::kWasmStringNewWtf16, Operator::kNoDeopt,
gasm_->Uint32Constant(memory), offset, size);
}
// 1 bit V8 Smi tag, 31 bits V8 Smi shift, 1 bit i31ref high-bit truncation.
constexpr int kI31To32BitSmiShift = 33;
......
......@@ -539,6 +539,7 @@ class WasmGraphBuilder {
Node** match_control, Node** match_effect,
Node** no_match_control, Node** no_match_effect);
Node* StringNewWtf8(uint32_t memory, Node* offset, Node* size);
Node* StringNewWtf16(uint32_t memory, Node* offset, Node* size);
Node* IsNull(Node* object);
Node* TypeGuard(Node* value, wasm::ValueType type);
......
......@@ -823,6 +823,56 @@ RUNTIME_FUNCTION(Runtime_WasmCreateResumePromise) {
return *result;
}
namespace {
Object NewStringFromWtf8(Isolate* isolate,
const base::Vector<const uint8_t>& data) {
wasm::Wtf8Decoder decoder(data);
if (!decoder.is_valid()) {
return ThrowWasmError(isolate, MessageTemplate::kWasmTrapStringInvalidWtf8);
}
if (decoder.utf16_length() == 0) return *isolate->factory()->empty_string();
if (decoder.is_one_byte()) {
if (data.size() == 1) {
return *isolate->factory()->LookupSingleCharacterStringFromCode(data[0]);
}
Handle<SeqOneByteString> result;
// TODO(12868): Override any exception with an uncatchable-by-wasm trap.
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
isolate->factory()->NewRawOneByteString(decoder.utf16_length()));
DisallowGarbageCollection no_gc;
decoder.Decode(result->GetChars(no_gc), data);
return *result;
}
Handle<SeqTwoByteString> result;
// TODO(12868): Override any exception with an uncatchable-by-wasm trap.
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
isolate->factory()->NewRawTwoByteString(decoder.utf16_length()));
DisallowGarbageCollection no_gc;
decoder.Decode(result->GetChars(no_gc), data);
return *result;
}
Object NewStringFromWtf16(Isolate* isolate,
const base::Vector<const base::uc16>& data) {
#if defined(V8_TARGET_LITTLE_ENDIAN)
// TODO(12868): Override any exception with an uncatchable-by-wasm trap.
RETURN_RESULT_OR_FAILURE(isolate, isolate->factory()->NewStringFromTwoByte(
data, AllocationType::kYoung));
#elif defined(V8_TARGET_BIG_ENDIAN)
// TODO(12868): Duplicate the guts of NewStringFromTwoByte, so that
// copying and transcoding the data can be done in a single pass.
UNIMPLEMENTED();
#else
#error Unknown endianness
#endif
}
} // namespace
// Returns the new string if the operation succeeds. Otherwise throws an
// exception and returns an empty result.
RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8) {
......@@ -844,33 +894,33 @@ RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8) {
const base::Vector<const uint8_t> bytes{instance->memory_start() + offset,
size};
wasm::Wtf8Decoder decoder(bytes);
if (!decoder.is_valid()) {
return ThrowWasmError(isolate, MessageTemplate::kWasmTrapStringInvalidWtf8);
}
return NewStringFromWtf8(isolate, bytes);
}
if (decoder.utf16_length() == 0) return *isolate->factory()->empty_string();
RUNTIME_FUNCTION(Runtime_WasmStringNewWtf16) {
ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(4, args.length());
HandleScope scope(isolate);
Handle<WasmInstanceObject> instance = args.at<WasmInstanceObject>(0);
uint32_t memory = args.positive_smi_value_at(1);
uint32_t offset = NumberToUint32(args[2]);
uint32_t size_in_codeunits = NumberToUint32(args[3]);
if (decoder.is_one_byte()) {
if (size == 1) {
return *isolate->factory()->LookupSingleCharacterStringFromCode(bytes[0]);
}
Handle<SeqOneByteString> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
isolate->factory()->NewRawOneByteString(decoder.utf16_length()));
DisallowGarbageCollection no_gc;
decoder.Decode(result->GetChars(no_gc), bytes);
return *result;
DCHECK_EQ(memory, 0);
USE(memory);
uint64_t mem_size = instance->memory_size();
if (size_in_codeunits > kMaxUInt32 / 2 ||
!base::IsInBounds<uint64_t>(offset, size_in_codeunits * 2, mem_size)) {
return ThrowWasmError(isolate, MessageTemplate::kWasmTrapMemOutOfBounds);
}
if (offset & 1) {
return ThrowWasmError(isolate, MessageTemplate::kWasmTrapUnalignedAccess);
}
Handle<SeqTwoByteString> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
isolate->factory()->NewRawTwoByteString(decoder.utf16_length()));
DisallowGarbageCollection no_gc;
decoder.Decode(result->GetChars(no_gc), bytes);
return *result;
const byte* bytes = instance->memory_start() + offset;
const base::uc16* codeunits = reinterpret_cast<const base::uc16*>(bytes);
return NewStringFromWtf16(isolate, {codeunits, size_in_codeunits});
}
} // namespace internal
......
......@@ -612,7 +612,8 @@ namespace internal {
F(WasmAllocateContinuation, 1, 1) \
F(WasmSyncStackLimit, 0, 1) \
F(WasmCreateResumePromise, 2, 1) \
F(WasmStringNewWtf8, 4, 1)
F(WasmStringNewWtf8, 4, 1) \
F(WasmStringNewWtf16, 4, 1)
#define FOR_EACH_INTRINSIC_WASM_TEST(F, I) \
F(DeserializeWasmModule, 2, 1) \
......
......@@ -6008,9 +6008,26 @@ class LiftoffCompiler {
void StringNewWtf16(FullDecoder* decoder,
const MemoryIndexImmediate<validate>& imm,
const Value& index, const Value& codeunits,
Value* result) {
UNIMPLEMENTED();
const Value& offset, const Value& size, Value* result) {
LiftoffRegList pinned;
LiftoffRegister memory_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
__ LoadConstant(memory_reg, WasmValue(static_cast<int32_t>(imm.index)));
LiftoffAssembler::VarState memory_var(kI32, memory_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringNewWtf16,
MakeSig::Returns(kRef).Params(kI32, kI32, kI32),
{
memory_var,
__ cache_state()->stack_state.end()[-2], // offset
__ cache_state()->stack_state.end()[-1] // size
},
decoder->position());
__ cache_state()->stack_state.pop_back(2);
RegisterDebugSideTableEntry(decoder, DebugSideTableBuilder::kDidSpill);
LiftoffRegister result_reg(kReturnRegister0);
__ PushRegister(kRef, result_reg);
}
void StringConst(FullDecoder* decoder,
......
......@@ -1134,7 +1134,7 @@ struct ControlBase : public PcForErrors<validate> {
F(StringNewWtf8, const MemoryIndexImmediate<validate>& imm, \
const Value& offset, const Value& size, Value* result) \
F(StringNewWtf16, const MemoryIndexImmediate<validate>& imm, \
const Value& index, const Value& codeunits, Value* result) \
const Value& offset, const Value& size, Value* result) \
F(StringConst, const StringConstImmediate<validate>& imm, Value* result) \
F(StringMeasureUtf8, const Value& str, Value* result) \
F(StringMeasureWtf8, const Value& str, Value* result) \
......@@ -5160,10 +5160,10 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
MemoryIndexImmediate<validate> imm(this, this->pc_ + opcode_length);
if (!this->Validate(this->pc_ + opcode_length, imm)) return 0;
ValueType addr_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32;
Value addr = Peek(1, 0, addr_type);
Value codeunits = Peek(0, 1, kWasmI32);
Value offset = Peek(1, 0, addr_type);
Value size = Peek(0, 1, kWasmI32);
Value result = CreateValue(kWasmStringRef);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringNewWtf16, imm, addr, codeunits,
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringNewWtf16, imm, offset, size,
&result);
Drop(2);
Push(result);
......
......@@ -1402,9 +1402,8 @@ class WasmGraphBuildingInterface {
void StringNewWtf16(FullDecoder* decoder,
const MemoryIndexImmediate<validate>& imm,
const Value& index, const Value& codeunits,
Value* result) {
UNIMPLEMENTED();
const Value& offset, const Value& size, Value* result) {
result->node = builder_->StringNewWtf16(imm.index, offset.node, size.node);
}
void StringConst(FullDecoder* decoder,
......
......@@ -125,7 +125,8 @@ struct WasmModule;
V(WasmSubtypeCheck) \
V(WasmOnStackReplace) \
V(WasmSuspend) \
V(WasmStringNewWtf8)
V(WasmStringNewWtf8) \
V(WasmStringNewWtf16)
// Sorted, disjoint and non-overlapping memory regions. A region is of the
// form [start, end). So there's no [start, end), [end, other_end),
......
......@@ -88,3 +88,54 @@ function makeWtf8TestDataSegment() {
WebAssembly.RuntimeError, "invalid WTF-8 string");
}
})();
function encodeWtf16LE(str) {
// String iterator coalesces surrogate pairs.
let out = [];
for (let i = 0; i < str.length; i++) {
codeunit = str.charCodeAt(i);
out.push(codeunit & 0xff)
out.push(codeunit >> 8);
}
return out;
}
function makeWtf16TestDataSegment() {
let data = []
let valid = {};
for (let str of ['',
'ascii',
'latin \xa9 1',
'two \ucccc byte',
'surrogate \ud800\udc000 pair',
'isolated \ud800 leading',
'isolated \udc00 trailing']) {
valid[str] = { offset: data.length, length: str.length };
for (let byte of encodeWtf16LE(str)) {
data.push(byte);
}
}
return { valid, data: Uint8Array.from(data) };
};
(function TestStringNewWtf16() {
let builder = new WasmModuleBuilder();
builder.addMemory(1, undefined, false, false);
let data = makeWtf16TestDataSegment();
builder.addDataSegment(0, data.data);
builder.addFunction("string_new_wtf16", kSig_w_ii)
.exportAs("string_new_wtf16")
.addBody([
kExprLocalGet, 0, kExprLocalGet, 1,
kGCPrefix, kExprStringNewWtf16, 0
]);
let instance = builder.instantiate();
for (let [str, {offset, length}] of Object.entries(data.valid)) {
assertEquals(str, instance.exports.string_new_wtf16(offset, length));
}
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment