Commit 8a30ee7e authored by Andy Wingo's avatar Andy Wingo Committed by V8 LUCI CQ

[stringrefs] Implement string.as_wtf8, stringview_wtf8.advance

We represent WTF-8 views as ByteArray instances.

Bug: v8:12868
Change-Id: I642ee2ef3d6fd7ea0f383073b282549c31233c7b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3750931
Commit-Queue: Andy Wingo <wingo@igalia.com>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81607}
parent c96aa211
......@@ -54,6 +54,7 @@ extern runtime WasmStringEncodeWtf8Array(
Context, Smi, String, WasmArray, Number): Number;
extern runtime WasmStringEncodeWtf16(
Context, WasmInstanceObject, Smi, String, Number, Smi, Smi): JSAny;
extern runtime WasmStringAsWtf8(Context, String): ByteArray;
}
namespace unsafe {
......@@ -932,6 +933,46 @@ builtin WasmStringIsUSVSequence(str: String): int32 {
if (Signed(ChangeNumberToUint32(length)) < 0) return 0;
return 1;
}
builtin WasmStringAsWtf8(str: String): ByteArray {
tail runtime::WasmStringAsWtf8(LoadContextFromFrame(), str);
}
macro IsWtf8CodepointStart(view: ByteArray, pos: uint32): bool {
// We're already at the start of a codepoint if the current byte
// doesn't start with 0b10xxxxxx.
return (view.bytes[Convert<uintptr>(pos)] & 0xc0) != 0x80;
}
builtin WasmWtf8PositionTreatment(view: ByteArray, pos: uint32): uint32 {
const length = Unsigned(SmiToInt32(view.length));
if (pos >= length) return length;
if (IsWtf8CodepointStart(view, pos)) return pos;
// Otherwise `pos` is part of a multibyte codepoint, and is not the
// leading byte. The next codepoint will start at pos + 1, pos + 2,
// or pos + 3.
if (pos + 1 == length) return length;
if (IsWtf8CodepointStart(view, pos + 1)) return pos + 1;
if (pos + 2 == length) return length;
if (IsWtf8CodepointStart(view, pos + 2)) return pos + 2;
return pos + 3;
}
builtin WasmStringViewWtf8Advance(
view: ByteArray, pos: uint32, bytes: uint32): uint32 {
const length = Unsigned(SmiToInt32(view.length));
const clampedPos = WasmWtf8PositionTreatment(view, pos);
if (bytes >= length - clampedPos) return length;
const newPos = clampedPos + bytes;
// Return the highest offset that starts a codepoint which is not greater
// than newPos.
if (IsWtf8CodepointStart(view, newPos)) return newPos;
if (IsWtf8CodepointStart(view, newPos - 1)) return newPos - 1;
if (IsWtf8CodepointStart(view, newPos - 2)) return newPos - 2;
return newPos - 3;
}
transitioning builtin WasmStringViewWtf16GetCodeUnit(
string: String, offset: uint32): uint32 {
try {
......
......@@ -5902,6 +5902,24 @@ Node* WasmGraphBuilder::StringIsUSVSequence(Node* str, CheckForNull null_check,
Operator::kNoDeopt, str);
}
Node* WasmGraphBuilder::StringAsWtf8(Node* str, CheckForNull null_check,
wasm::WasmCodePosition position) {
if (null_check == kWithNullCheck) str = AssertNotNull(str, position);
return gasm_->CallBuiltin(Builtin::kWasmStringAsWtf8, Operator::kNoDeopt,
str);
}
Node* WasmGraphBuilder::StringViewWtf8Advance(Node* view,
CheckForNull null_check,
Node* pos, Node* bytes,
wasm::WasmCodePosition position) {
if (null_check == kWithNullCheck) view = AssertNotNull(view, position);
return gasm_->CallBuiltin(Builtin::kWasmStringViewWtf8Advance,
Operator::kNoDeopt, view, pos, bytes);
}
Node* WasmGraphBuilder::StringViewWtf16GetCodeUnit(
Node* string, CheckForNull null_check, Node* offset,
wasm::WasmCodePosition position) {
......
......@@ -568,6 +568,10 @@ class WasmGraphBuilder {
CheckForNull b_null_check, wasm::WasmCodePosition position);
Node* StringIsUSVSequence(Node* str, CheckForNull null_check,
wasm::WasmCodePosition position);
Node* StringAsWtf8(Node* str, CheckForNull null_check,
wasm::WasmCodePosition position);
Node* StringViewWtf8Advance(Node* view, CheckForNull null_check, Node* pos,
Node* bytes, wasm::WasmCodePosition position);
Node* StringViewWtf16GetCodeUnit(Node* string, CheckForNull null_check,
Node* offset,
wasm::WasmCodePosition position);
......
......@@ -1004,6 +1004,14 @@ int MeasureWtf8(base::Vector<const T> wtf16) {
}
return length;
}
int MeasureWtf8(Isolate* isolate, Handle<String> string) {
string = String::Flatten(isolate, string);
DisallowGarbageCollection no_gc;
String::FlatContent content = string->GetFlatContent(no_gc);
DCHECK(content.IsFlat());
return content.IsOneByte() ? MeasureWtf8(content.ToOneByteVector())
: MeasureWtf8(content.ToUC16Vector());
}
size_t MaxEncodedSize(base::Vector<const uint8_t> wtf16) {
DCHECK(wtf16.size() < std::numeric_limits<size_t>::max() /
unibrow::Utf8::kMax8BitCodeUnitSize);
......@@ -1116,18 +1124,7 @@ RUNTIME_FUNCTION(Runtime_WasmStringMeasureWtf8) {
HandleScope scope(isolate);
Handle<String> string(String::cast(args[0]), isolate);
string = String::Flatten(isolate, string);
int length;
{
DisallowGarbageCollection no_gc;
String::FlatContent content = string->GetFlatContent(no_gc);
DCHECK(content.IsFlat());
if (content.IsOneByte()) {
length = MeasureWtf8(content.ToOneByteVector());
} else {
length = MeasureWtf8(content.ToUC16Vector());
}
}
int length = MeasureWtf8(isolate, string);
return *isolate->factory()->NewNumberFromInt(length);
}
......@@ -1216,5 +1213,24 @@ RUNTIME_FUNCTION(Runtime_WasmStringEncodeWtf16) {
return Smi::zero(); // Unused.
}
RUNTIME_FUNCTION(Runtime_WasmStringAsWtf8) {
ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(1, args.length());
HandleScope scope(isolate);
Handle<String> string(String::cast(args[0]), isolate);
int wtf8_length = MeasureWtf8(isolate, string);
Handle<ByteArray> array = isolate->factory()->NewByteArray(wtf8_length);
wasm::StringRefWtf8Policy policy = wasm::kWtf8PolicyAccept;
auto get_writable_bytes =
[&](const DisallowGarbageCollection&) -> base::Vector<char> {
return {reinterpret_cast<char*>(array->GetDataStartAddress()),
static_cast<size_t>(wtf8_length)};
};
EncodeWtf8(isolate, policy, string, get_writable_bytes, 0,
MessageTemplate::kWasmTrapArrayOutOfBounds);
return *array;
}
} // namespace internal
} // namespace v8
......@@ -618,7 +618,8 @@ namespace internal {
F(WasmStringMeasureWtf8, 1, 1) \
F(WasmStringEncodeWtf8, 5, 1) \
F(WasmStringEncodeWtf16, 6, 1) \
F(WasmStringEncodeWtf8Array, 4, 1)
F(WasmStringEncodeWtf8Array, 4, 1) \
F(WasmStringAsWtf8, 1, 1)
#define FOR_EACH_INTRINSIC_WASM_TEST(F, I) \
F(DeserializeWasmModule, 2, 1) \
......
......@@ -6609,13 +6609,52 @@ class LiftoffCompiler {
}
void StringAsWtf8(FullDecoder* decoder, const Value& str, Value* result) {
UNIMPLEMENTED();
LiftoffRegList pinned;
LiftoffRegister str_reg = pinned.set(__ PopToRegister(pinned));
MaybeEmitNullCheck(decoder, str_reg.gp(), pinned, str.type);
LiftoffAssembler::VarState str_var(kRef, str_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringAsWtf8,
MakeSig::Returns(kRef).Params(kRef),
{
str_var,
},
decoder->position());
RegisterDebugSideTableEntry(decoder, DebugSideTableBuilder::kDidSpill);
LiftoffRegister result_reg(kReturnRegister0);
__ PushRegister(kRef, result_reg);
}
void StringViewWtf8Advance(FullDecoder* decoder, const Value& view,
const Value& pos, const Value& bytes,
Value* result) {
UNIMPLEMENTED();
LiftoffRegList pinned;
LiftoffAssembler::VarState& bytes_var =
__ cache_state()->stack_state.end()[-1];
LiftoffAssembler::VarState& pos_var =
__ cache_state()->stack_state.end()[-2];
LiftoffRegister view_reg = pinned.set(
__ LoadToRegister(__ cache_state()->stack_state.end()[-3], pinned));
MaybeEmitNullCheck(decoder, view_reg.gp(), pinned, view.type);
LiftoffAssembler::VarState view_var(kRef, view_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringViewWtf8Advance,
MakeSig::Returns(kI32).Params(kRef, kI32, kI32),
{
view_var,
pos_var,
bytes_var,
},
decoder->position());
__ DropValues(3);
RegisterDebugSideTableEntry(decoder, DebugSideTableBuilder::kDidSpill);
LiftoffRegister result_reg(kReturnRegister0);
__ PushRegister(kI32, result_reg);
}
void StringViewWtf8Encode(FullDecoder* decoder,
......
......@@ -1491,13 +1491,17 @@ class WasmGraphBuildingInterface {
}
void StringAsWtf8(FullDecoder* decoder, const Value& str, Value* result) {
UNIMPLEMENTED();
SetAndTypeNode(result,
builder_->StringAsWtf8(str.node, NullCheckFor(str.type),
decoder->position()));
}
void StringViewWtf8Advance(FullDecoder* decoder, const Value& view,
const Value& pos, const Value& bytes,
Value* result) {
UNIMPLEMENTED();
result->node = builder_->StringViewWtf8Advance(
view.node, NullCheckFor(view.type), pos.node, bytes.node,
decoder->position());
}
void StringViewWtf8Encode(FullDecoder* decoder,
......
......@@ -139,7 +139,9 @@ struct WasmModule;
V(WasmStringNewWtf8Array) \
V(WasmStringNewWtf16Array) \
V(WasmStringEncodeWtf8Array) \
V(WasmStringEncodeWtf16Array)
V(WasmStringEncodeWtf16Array) \
V(WasmStringAsWtf8) \
V(WasmStringViewWtf8Advance)
// Sorted, disjoint and non-overlapping memory regions. A region is of the
// form [start, end). So there's no [start, end), [end, other_end),
......
......@@ -10,6 +10,7 @@ let kSig_w_ii = makeSig([kWasmI32, kWasmI32], [kWasmStringRef]);
let kSig_w_v = makeSig([], [kWasmStringRef]);
let kSig_i_w = makeSig([kWasmStringRef], [kWasmI32]);
let kSig_i_wi = makeSig([kWasmStringRef, kWasmI32], [kWasmI32]);
let kSig_i_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32], [kWasmI32]);
let kSig_i_ww = makeSig([kWasmStringRef, kWasmStringRef], [kWasmI32]);
let kSig_i_wiii = makeSig([kWasmStringRef, kWasmI32, kWasmI32, kWasmI32],
[kWasmI32]);
......@@ -778,3 +779,72 @@ function makeWtf16TestDataSegment() {
assertThrows(() => instance.exports.slice_null(),
WebAssembly.RuntimeError, "dereferencing a null pointer");
})();
(function TestStringViewWtf8() {
let builder = new WasmModuleBuilder();
builder.addFunction("advance", kSig_i_wii)
.exportFunc()
.addBody([
kExprLocalGet, 0,
kGCPrefix, kExprStringAsWtf8,
kExprLocalGet, 1,
kExprLocalGet, 2,
kGCPrefix, kExprStringViewWtf8Advance
]);
builder.addFunction("advance_null", kSig_i_v)
.exportFunc()
.addBody([
kExprRefNull, kStringViewWtf8Code,
kExprI32Const, 0,
kExprI32Const, 0,
kGCPrefix, kExprStringViewWtf8Advance
]);
function Wtf8StartsCodepoint(wtf8, offset) {
return (wtf8[offset] & 0xc0) != 0x80;
}
function Wtf8PositionTreatment(wtf8, offset) {
while (offset < wtf8.length) {
if (Wtf8StartsCodepoint(wtf8, offset)) return offset;
offset++;
}
return wtf8.length;
}
function CodepointStart(wtf8, offset) {
if (offset >= wtf8.length) return wtf8.length;
while (!Wtf8StartsCodepoint(wtf8, offset)) {
offset--;
}
return offset;
}
let instance = builder.instantiate();
for (let pos = 0; pos < "ascii".length; pos++) {
assertEquals(pos + 1, instance.exports.advance("ascii", pos, 1));
}
for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str);
assertEquals(wtf8.length, instance.exports.advance(str, 0, -1));
assertEquals(wtf8.length, instance.exports.advance(str, -1, 0));
assertEquals(wtf8.length, instance.exports.advance(str, 0, wtf8.length));
assertEquals(wtf8.length, instance.exports.advance(str, wtf8.length, 0));
assertEquals(wtf8.length,
instance.exports.advance(str, 0, wtf8.length + 1));
assertEquals(wtf8.length,
instance.exports.advance(str, wtf8.length + 1, 0));
for (let pos = 0; pos < wtf8.length; pos++) {
for (let bytes = 0; bytes < wtf8.length - pos; bytes++) {
assertEquals(
CodepointStart(wtf8, Wtf8PositionTreatment(wtf8, pos) + bytes),
instance.exports.advance(str, pos, bytes));
}
}
}
assertThrows(() => instance.exports.advance_null(),
WebAssembly.RuntimeError, "dereferencing a null pointer");
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment