Commit bd149e9f authored by Andy Wingo's avatar Andy Wingo Committed by V8 LUCI CQ

[stringrefs] Implement string.new_wtf8_array

See https://github.com/WebAssembly/stringref/issues/1.

Bug: v8:12868
Change-Id: Ic1c9b55b1ec35c32c79d9b2cb0be243b96fc4453
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3734807
Commit-Queue: Andy Wingo <wingo@igalia.com>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81444}
parent cf92762b
......@@ -40,6 +40,8 @@ extern runtime WasmArrayNewSegment(
Context, WasmInstanceObject, Smi, Smi, Smi, Map): Object;
extern runtime WasmStringNewWtf8(
Context, WasmInstanceObject, Smi, Smi, Number, Number): String;
extern runtime WasmStringNewWtf8Array(
Context, Smi, WasmArray, Smi, Smi): String;
extern runtime WasmStringNewWtf16(
Context, WasmInstanceObject, Smi, Number, Number): String;
extern runtime WasmStringConst(Context, WasmInstanceObject, Smi): String;
......@@ -804,6 +806,20 @@ builtin WasmStringNewWtf8(
LoadContextFromInstance(instance), instance, memory, policy,
WasmUint32ToNumber(offset), WasmUint32ToNumber(size));
}
builtin WasmStringNewWtf8Array(
start: uint32, end: uint32, array: WasmArray, policy: Smi): String {
const context = LoadContextFromFrame();
try {
if (array.length < end) goto OffsetOutOfRange;
if (end < start) goto OffsetOutOfRange;
tail runtime::WasmStringNewWtf8Array(
context, policy, array, SmiFromUint32(start), SmiFromUint32(end));
} label OffsetOutOfRange deferred {
const error = MessageTemplate::kWasmTrapArrayOutOfBounds;
runtime::ThrowWasmError(context, SmiConstant(error));
unreachable;
}
}
builtin WasmStringNewWtf16(
memory: uint32, offset: uint32, size: uint32): String {
const instance = LoadInstanceFromFrame();
......
......@@ -5761,6 +5761,14 @@ Node* WasmGraphBuilder::StringNewWtf8(uint32_t memory,
gasm_->SmiConstant(static_cast<int32_t>(policy)));
}
Node* WasmGraphBuilder::StringNewWtf8Array(wasm::StringRefWtf8Policy policy,
Node* array, Node* start,
Node* end) {
return gasm_->CallBuiltin(Builtin::kWasmStringNewWtf8Array,
Operator::kNoDeopt, start, end, array,
gasm_->SmiConstant(static_cast<int32_t>(policy)));
}
Node* WasmGraphBuilder::StringNewWtf16(uint32_t memory, Node* offset,
Node* size) {
return gasm_->CallBuiltin(Builtin::kWasmStringNewWtf16, Operator::kNoDeopt,
......
......@@ -537,6 +537,8 @@ class WasmGraphBuilder {
Node** no_match_control, Node** no_match_effect);
Node* StringNewWtf8(uint32_t memory, wasm::StringRefWtf8Policy policy,
Node* offset, Node* size);
Node* StringNewWtf8Array(wasm::StringRefWtf8Policy policy, Node* array,
Node* start, Node* end);
Node* StringNewWtf16(uint32_t memory, Node* offset, Node* size);
Node* StringConst(uint32_t index);
Node* StringMeasureUtf8(Node* string, CheckForNull null_check,
......
......@@ -857,6 +857,31 @@ RUNTIME_FUNCTION(Runtime_WasmCreateResumePromise) {
return *result;
}
namespace {
Object StringFromWtf8(Isolate* isolate, wasm::StringRefWtf8Policy policy,
const base::Vector<const uint8_t> bytes) {
// TODO(12868): Override any exception with an uncatchable-by-wasm trap.
Handle<String> result;
switch (policy) {
case wasm::kWtf8PolicyReject:
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewStringFromStrictUtf8(bytes));
break;
case wasm::kWtf8PolicyAccept:
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewStringFromWtf8(bytes));
break;
case wasm::kWtf8PolicyReplace: {
auto string = base::Vector<const char>::cast(bytes);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewStringFromUtf8(string));
break;
}
}
return *result;
}
} // namespace
// Returns the new string if the operation succeeds. Otherwise throws an
// exception and returns an empty result.
RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8) {
......@@ -882,25 +907,28 @@ RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8) {
const base::Vector<const uint8_t> bytes{instance->memory_start() + offset,
size};
// TODO(12868): Override any exception with an uncatchable-by-wasm trap.
Handle<String> result;
switch (policy) {
case wasm::kWtf8PolicyReject:
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewStringFromStrictUtf8(bytes));
break;
case wasm::kWtf8PolicyAccept:
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewStringFromWtf8(bytes));
break;
case wasm::kWtf8PolicyReplace: {
auto string = base::Vector<const char>::cast(bytes);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewStringFromUtf8(string));
break;
}
}
return *result;
return StringFromWtf8(isolate, policy, bytes);
}
RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8Array) {
ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(4, args.length());
HandleScope scope(isolate);
uint32_t policy_value = args.positive_smi_value_at(0);
Handle<WasmArray> array = args.at<WasmArray>(1);
uint32_t start = NumberToUint32(args[2]);
uint32_t end = NumberToUint32(args[3]);
DCHECK(policy_value <= wasm::kLastWtf8Policy);
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value);
DCHECK_EQ(sizeof(uint8_t), array->type()->element_type().value_kind_size());
const void* src = ArrayElementAddress(array, start, sizeof(uint8_t));
DCHECK_LE(start, end);
DCHECK_LE(end, array->length());
const base::Vector<const uint8_t> bytes{static_cast<const uint8_t*>(src),
end - start};
return StringFromWtf8(isolate, policy, bytes);
}
RUNTIME_FUNCTION(Runtime_WasmStringNewWtf16) {
......
......@@ -613,6 +613,7 @@ namespace internal {
F(WasmSyncStackLimit, 0, 1) \
F(WasmCreateResumePromise, 2, 1) \
F(WasmStringNewWtf8, 5, 1) \
F(WasmStringNewWtf8Array, 4, 1) \
F(WasmStringNewWtf16, 4, 1) \
F(WasmStringConst, 2, 1) \
F(WasmStringMeasureUtf8, 1, 1) \
......
......@@ -6225,7 +6225,32 @@ class LiftoffCompiler {
const Wtf8PolicyImmediate<validate>& imm,
const Value& array, const Value& start,
const Value& end, Value* result) {
UNIMPLEMENTED();
LiftoffRegList pinned;
LiftoffRegister array_reg = pinned.set(
__ LoadToRegister(__ cache_state()->stack_state.end()[-3], pinned));
MaybeEmitNullCheck(decoder, array_reg.gp(), pinned, array.type);
LiftoffAssembler::VarState array_var(kRef, array_reg, 0);
LiftoffRegister policy_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.value));
LiftoffAssembler::VarState policy_var(kSmiKind, policy_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringNewWtf8Array,
MakeSig::Returns(kRef).Params(kI32, kI32, kRef, kSmiKind),
{
__ cache_state()->stack_state.end()[-2], // start
__ cache_state()->stack_state.end()[-1], // end
array_var,
policy_var,
},
decoder->position());
__ cache_state()->stack_state.pop_back(3);
RegisterDebugSideTableEntry(decoder, DebugSideTableBuilder::kDidSpill);
LiftoffRegister result_reg(kReturnRegister0);
__ PushRegister(kRef, result_reg);
}
void StringNewWtf16(FullDecoder* decoder,
......
......@@ -1408,7 +1408,8 @@ class WasmGraphBuildingInterface {
const Wtf8PolicyImmediate<validate>& imm,
const Value& array, const Value& start,
const Value& end, Value* result) {
UNIMPLEMENTED();
result->node = builder_->StringNewWtf8Array(imm.value, array.node,
start.node, end.node);
}
void StringNewWtf16(FullDecoder* decoder,
......
......@@ -135,7 +135,8 @@ struct WasmModule;
V(WasmStringIsUSVSequence) \
V(WasmStringViewWtf16GetCodeUnit) \
V(WasmStringViewWtf16Encode) \
V(WasmStringViewWtf16Slice)
V(WasmStringViewWtf16Slice) \
V(WasmStringNewWtf8Array)
// Sorted, disjoint and non-overlapping memory regions. A region is of the
// form [start, end). So there's no [start, end), [end, other_end),
......
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --experimental-wasm-stringref --experimental-wasm-gc
d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
let kSig_w_v = makeSig([], [kWasmStringRef]);
let kSig_w_ii = makeSig([kWasmI32, kWasmI32], [kWasmStringRef]);
function encodeWtf8(str) {
// String iterator coalesces surrogate pairs.
let out = [];
for (let codepoint of str) {
codepoint = codepoint.codePointAt(0);
if (codepoint <= 0x7f) {
out.push(codepoint);
} else if (codepoint <= 0x7ff) {
out.push(0xc0 | (codepoint >> 6));
out.push(0x80 | (codepoint & 0x3f));
} else if (codepoint <= 0xffff) {
out.push(0xe0 | (codepoint >> 12));
out.push(0x80 | ((codepoint >> 6) & 0x3f));
out.push(0x80 | (codepoint & 0x3f));
} else if (codepoint <= 0x10ffff) {
out.push(0xf0 | (codepoint >> 18));
out.push(0x80 | ((codepoint >> 12) & 0x3f));
out.push(0x80 | ((codepoint >> 6) & 0x3f));
out.push(0x80 | (codepoint & 0x3f));
} else {
throw new Error("bad codepoint " + codepoint);
}
}
return out;
}
let interestingStrings = ['',
'ascii',
'latin \xa9 1',
'two \ucccc byte',
'surrogate \ud800\udc000 pair',
'isolated \ud800 leading',
'isolated \udc00 trailing',
'\ud800 isolated leading at beginning',
'\udc00 isolated trailing at beginning',
'isolated leading at end \ud800',
'isolated trailing at end \udc00',
'swapped surrogate \udc00\ud800 pair'];
function IsSurrogate(codepoint) {
return 0xD800 <= codepoint && codepoint <= 0xDFFF
}
function HasIsolatedSurrogate(str) {
for (let codepoint of str) {
let value = codepoint.codePointAt(0);
if (IsSurrogate(value)) return true;
}
return false;
}
function ReplaceIsolatedSurrogates(str, replacement='\ufffd') {
let replaced = '';
for (let codepoint of str) {
replaced +=
IsSurrogate(codepoint.codePointAt(0)) ? replacement : codepoint;
}
return replaced;
}
function makeWtf8TestDataSegment() {
let data = []
let valid = {};
let invalid = {};
for (let str of interestingStrings) {
let bytes = encodeWtf8(str);
valid[str] = { offset: data.length, length: bytes.length };
for (let byte of bytes) {
data.push(byte);
}
}
for (let bytes of ['trailing high byte \xa9',
'interstitial high \xa9 byte',
'invalid \xc0 byte',
'invalid three-byte \xed\xd0\x80',
'surrogate \xed\xa0\x80\xed\xb0\x80 pair']) {
invalid[bytes] = { offset: data.length, length: bytes.length };
for (let i = 0; i < bytes.length; i++) {
data.push(bytes.charCodeAt(i));
}
}
return { valid, invalid, data: Uint8Array.from(data) };
};
(function TestStringNewWtf8Array() {
let builder = new WasmModuleBuilder();
let data = makeWtf8TestDataSegment();
let data_index = builder.addPassiveDataSegment(data.data);
let ascii_data_index =
builder.addPassiveDataSegment(Uint8Array.from(encodeWtf8("ascii")));
let i8_array = builder.addArray(kWasmI8, true);
let make_i8_array = builder.addFunction(
"make_i8_array", makeSig([], [wasmRefType(i8_array)]))
.addBody([
...wasmI32Const(0),
...wasmI32Const(data.data.length),
kGCPrefix, kExprArrayNewDataStatic, i8_array, data_index
]).index;
for (let [policy, name] of [[kWtf8PolicyAccept, "new_wtf8"],
[kWtf8PolicyReject, "new_utf8"],
[kWtf8PolicyReplace, "new_utf8_sloppy"]]) {
builder.addFunction(name, kSig_w_ii)
.exportFunc()
.addBody([
kExprCallFunction, make_i8_array,
kExprLocalGet, 0, kExprLocalGet, 1,
kGCPrefix, kExprStringNewWtf8Array, policy
]);
}
builder.addFunction("bounds_check", kSig_w_ii)
.exportFunc()
.addBody([
...wasmI32Const(0),
...wasmI32Const("ascii".length),
kGCPrefix, kExprArrayNewDataStatic, i8_array, ascii_data_index,
kExprLocalGet, 0, kExprLocalGet, 1,
kGCPrefix, kExprStringNewWtf8Array, kWtf8PolicyAccept
]);
let instance = builder.instantiate();
for (let [str, {offset, length}] of Object.entries(data.valid)) {
let start = offset;
let end = offset + length;
assertEquals(str, instance.exports.new_wtf8(start, end));
if (HasIsolatedSurrogate(str)) {
assertThrows(() => instance.exports.new_utf8(start, end),
WebAssembly.RuntimeError, "invalid UTF-8 string");
// Isolated surrogates have the three-byte pattern ED [A0,BF]
// [80,BF]. When the sloppy decoder gets to the second byte, it
// will reject the sequence, and then retry parsing at the second
// byte. Seeing the second byte can't start a sequence, it
// replaces the second byte and continues with the next, which
// also can't start a sequence. The result is that one isolated
// surrogate is replaced by three U+FFFD codepoints.
assertEquals(ReplaceIsolatedSurrogates(str, '\ufffd\ufffd\ufffd'),
instance.exports.new_utf8_sloppy(start, end));
} else {
assertEquals(str, instance.exports.new_utf8(start, end));
assertEquals(str, instance.exports.new_utf8_sloppy(start, end));
}
}
for (let [str, {offset, length}] of Object.entries(data.invalid)) {
let start = offset;
let end = offset + length;
assertThrows(() => instance.exports.new_wtf8(start, end),
WebAssembly.RuntimeError, "invalid WTF-8 string");
assertThrows(() => instance.exports.new_utf8(start, end),
WebAssembly.RuntimeError, "invalid UTF-8 string");
}
assertEquals("ascii", instance.exports.bounds_check(0, "ascii".length));
assertEquals("", instance.exports.bounds_check("ascii".length,
"ascii".length));
assertEquals("i", instance.exports.bounds_check("ascii".length - 1,
"ascii".length));
assertThrows(() => instance.exports.bounds_check(0, 100),
WebAssembly.RuntimeError, "array element access out of bounds");
assertThrows(() => instance.exports.bounds_check("ascii".length,
"ascii".length + 1),
WebAssembly.RuntimeError, "array element access out of bounds");
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment