Commit a541dbcb authored by Andy Wingo's avatar Andy Wingo Committed by V8 LUCI CQ

[stringrefs] Implement string.encode_wtf8

Bug: v8:12868
Change-Id: I81f175057ec0d40dbd1e9fd329a0e37ef3ade814
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3702332Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Andy Wingo <wingo@igalia.com>
Cr-Commit-Position: refs/heads/main@{#81118}
parent ae55e4d2
...@@ -45,6 +45,8 @@ extern runtime WasmStringNewWtf16( ...@@ -45,6 +45,8 @@ extern runtime WasmStringNewWtf16(
extern runtime WasmStringConst(Context, WasmInstanceObject, Smi): String; extern runtime WasmStringConst(Context, WasmInstanceObject, Smi): String;
extern runtime WasmStringMeasureUtf8(Context, String): Number; extern runtime WasmStringMeasureUtf8(Context, String): Number;
extern runtime WasmStringMeasureWtf8(Context, String): Number; extern runtime WasmStringMeasureWtf8(Context, String): Number;
extern runtime WasmStringEncodeWtf8(
Context, WasmInstanceObject, Smi, Smi, String, Number): JSAny;
} }
namespace unsafe { namespace unsafe {
...@@ -811,6 +813,13 @@ builtin WasmStringMeasureWtf8(string: String): int32 { ...@@ -811,6 +813,13 @@ builtin WasmStringMeasureWtf8(string: String): int32 {
const result = runtime::WasmStringMeasureWtf8(LoadContextFromFrame(), string); const result = runtime::WasmStringMeasureWtf8(LoadContextFromFrame(), string);
return Signed(ChangeNumberToUint32(result)); return Signed(ChangeNumberToUint32(result));
} }
builtin WasmStringEncodeWtf8(
string: String, offset: uint32, memory: Smi, policy: Smi): JSAny {
const instance = LoadInstanceFromFrame();
tail runtime::WasmStringEncodeWtf8(
LoadContextFromInstance(instance), instance, memory, policy, string,
WasmUint32ToNumber(offset));
}
transitioning builtin WasmStringViewWtf16GetCodeUnit( transitioning builtin WasmStringViewWtf16GetCodeUnit(
string: String, offset: uint32): uint32 { string: String, offset: uint32): uint32 {
try { try {
......
...@@ -646,6 +646,8 @@ namespace internal { ...@@ -646,6 +646,8 @@ namespace internal {
T(WasmTrapArrayTooLarge, "requested new array is too large") \ T(WasmTrapArrayTooLarge, "requested new array is too large") \
T(WasmTrapStringInvalidWtf8, "invalid WTF-8 string") \ T(WasmTrapStringInvalidWtf8, "invalid WTF-8 string") \
T(WasmTrapStringOffsetOutOfBounds, "string offset out of bounds") \ T(WasmTrapStringOffsetOutOfBounds, "string offset out of bounds") \
T(WasmTrapStringIsolatedSurrogate, \
"Failed to encode string as UTF-8: contains unpaired surrogate") \
T(WasmExceptionError, "wasm exception") \ T(WasmExceptionError, "wasm exception") \
/* Asm.js validation related */ \ /* Asm.js validation related */ \
T(AsmJsInvalid, "Invalid asm.js: %") \ T(AsmJsInvalid, "Invalid asm.js: %") \
......
...@@ -5786,6 +5786,19 @@ Node* WasmGraphBuilder::StringMeasureWtf16(Node* string, ...@@ -5786,6 +5786,19 @@ Node* WasmGraphBuilder::StringMeasureWtf16(Node* string,
wasm::ObjectAccess::ToTagged(String::kLengthOffset)); wasm::ObjectAccess::ToTagged(String::kLengthOffset));
} }
Node* WasmGraphBuilder::StringEncodeWtf8(uint32_t memory,
wasm::StringRefWtf8Policy policy,
Node* string, CheckForNull null_check,
Node* offset,
wasm::WasmCodePosition position) {
if (null_check == kWithNullCheck) {
string = AssertNotNull(string, position);
}
return gasm_->CallBuiltin(Builtin::kWasmStringEncodeWtf8, Operator::kNoDeopt,
string, offset, gasm_->SmiConstant(memory),
gasm_->SmiConstant(policy));
}
Node* WasmGraphBuilder::StringViewWtf16GetCodeUnit( Node* WasmGraphBuilder::StringViewWtf16GetCodeUnit(
Node* string, CheckForNull null_check, Node* offset, Node* string, CheckForNull null_check, Node* offset,
wasm::WasmCodePosition position) { wasm::WasmCodePosition position) {
......
...@@ -547,6 +547,9 @@ class WasmGraphBuilder { ...@@ -547,6 +547,9 @@ class WasmGraphBuilder {
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* StringMeasureWtf16(Node* string, CheckForNull null_check, Node* StringMeasureWtf16(Node* string, CheckForNull null_check,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* StringEncodeWtf8(uint32_t memory, wasm::StringRefWtf8Policy policy,
Node* string, CheckForNull null_check, Node* offset,
wasm::WasmCodePosition position);
Node* StringViewWtf16GetCodeUnit(Node* string, CheckForNull null_check, Node* StringViewWtf16GetCodeUnit(Node* string, CheckForNull null_check,
Node* offset, Node* offset,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
......
...@@ -918,6 +918,7 @@ RUNTIME_FUNCTION(Runtime_WasmStringConst) { ...@@ -918,6 +918,7 @@ RUNTIME_FUNCTION(Runtime_WasmStringConst) {
} }
namespace { namespace {
// TODO(12868): Consider unifying with api.cc:String::Utf8Length.
template <typename T> template <typename T>
int MeasureWtf8(base::Vector<const T> wtf16) { int MeasureWtf8(base::Vector<const T> wtf16) {
int previous = unibrow::Utf16::kNoPreviousCharacter; int previous = unibrow::Utf16::kNoPreviousCharacter;
...@@ -932,6 +933,56 @@ int MeasureWtf8(base::Vector<const T> wtf16) { ...@@ -932,6 +933,56 @@ int MeasureWtf8(base::Vector<const T> wtf16) {
} }
return length; return length;
} }
size_t MaxEncodedSize(base::Vector<const uint8_t> wtf16) {
DCHECK(wtf16.size() < std::numeric_limits<size_t>::max() /
unibrow::Utf8::kMax8BitCodeUnitSize);
return wtf16.size() * unibrow::Utf8::kMax8BitCodeUnitSize;
}
size_t MaxEncodedSize(base::Vector<const base::uc16> wtf16) {
DCHECK(wtf16.size() < std::numeric_limits<size_t>::max() /
unibrow::Utf8::kMax16BitCodeUnitSize);
return wtf16.size() * unibrow::Utf8::kMax16BitCodeUnitSize;
}
bool HasUnpairedSurrogate(base::Vector<const uint8_t> wtf16) { return false; }
bool HasUnpairedSurrogate(base::Vector<const base::uc16> wtf16) {
return unibrow::Utf16::HasUnpairedSurrogate(wtf16.begin(), wtf16.size());
}
// TODO(12868): Consider unifying with api.cc:String::WriteUtf8.
template <typename T>
MessageTemplate EncodeWtf8(char* memory_start, uint32_t offset, size_t mem_size,
base::Vector<const T> wtf16,
wasm::StringRefWtf8Policy policy) {
// The first check is a quick estimate to decide whether the second check
// is worth the computation.
if (!base::IsInBounds<size_t>(offset, MaxEncodedSize(wtf16), mem_size) &&
!base::IsInBounds<size_t>(offset, MeasureWtf8(wtf16), mem_size)) {
return MessageTemplate::kWasmTrapMemOutOfBounds;
}
bool replace_invalid = false;
switch (policy) {
case wasm::kWtf8PolicyAccept:
break;
case wasm::kWtf8PolicyReject:
if (HasUnpairedSurrogate(wtf16)) {
return MessageTemplate::kWasmTrapStringIsolatedSurrogate;
}
break;
case wasm::kWtf8PolicyReplace:
replace_invalid = true;
break;
default:
UNREACHABLE();
}
char* dst = memory_start + offset;
int previous = unibrow::Utf16::kNoPreviousCharacter;
for (auto code_unit : wtf16) {
dst += unibrow::Utf8::Encode(dst, code_unit, previous, replace_invalid);
previous = code_unit;
}
return MessageTemplate::kNone;
}
} // namespace } // namespace
RUNTIME_FUNCTION(Runtime_WasmStringMeasureUtf8) { RUNTIME_FUNCTION(Runtime_WasmStringMeasureUtf8) {
...@@ -982,5 +1033,39 @@ RUNTIME_FUNCTION(Runtime_WasmStringMeasureWtf8) { ...@@ -982,5 +1033,39 @@ RUNTIME_FUNCTION(Runtime_WasmStringMeasureWtf8) {
return *isolate->factory()->NewNumberFromInt(length); return *isolate->factory()->NewNumberFromInt(length);
} }
RUNTIME_FUNCTION(Runtime_WasmStringEncodeWtf8) {
ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(5, args.length());
HandleScope scope(isolate);
Handle<WasmInstanceObject> instance = args.at<WasmInstanceObject>(0);
uint32_t memory = args.positive_smi_value_at(1);
uint32_t policy_value = args.positive_smi_value_at(2);
Handle<String> string = args.at<String>(3);
uint32_t offset = NumberToUint32(args[4]);
DCHECK_EQ(memory, 0);
USE(memory);
DCHECK(policy_value <= wasm::kLastWtf8Policy);
char* memory_start = reinterpret_cast<char*>(instance->memory_start());
size_t mem_size = instance->memory_size();
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value);
string = String::Flatten(isolate, string);
MessageTemplate error;
{
DisallowGarbageCollection no_gc;
String::FlatContent content = string->GetFlatContent(no_gc);
error = content.IsOneByte() ? EncodeWtf8(memory_start, offset, mem_size,
content.ToOneByteVector(), policy)
: EncodeWtf8(memory_start, offset, mem_size,
content.ToUC16Vector(), policy);
}
if (error != MessageTemplate::kNone) {
return ThrowWasmError(isolate, error);
}
return Smi::zero(); // Unused.
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -616,7 +616,8 @@ namespace internal { ...@@ -616,7 +616,8 @@ namespace internal {
F(WasmStringNewWtf16, 4, 1) \ F(WasmStringNewWtf16, 4, 1) \
F(WasmStringConst, 2, 1) \ F(WasmStringConst, 2, 1) \
F(WasmStringMeasureUtf8, 1, 1) \ F(WasmStringMeasureUtf8, 1, 1) \
F(WasmStringMeasureWtf8, 1, 1) F(WasmStringMeasureWtf8, 1, 1) \
F(WasmStringEncodeWtf8, 5, 1)
#define FOR_EACH_INTRINSIC_WASM_TEST(F, I) \ #define FOR_EACH_INTRINSIC_WASM_TEST(F, I) \
F(DeserializeWasmModule, 2, 1) \ F(DeserializeWasmModule, 2, 1) \
......
...@@ -179,6 +179,9 @@ class V8_EXPORT_PRIVATE Utf8 { ...@@ -179,6 +179,9 @@ class V8_EXPORT_PRIVATE Utf8 {
// The maximum size a single UTF-16 code unit may take up when encoded as // The maximum size a single UTF-16 code unit may take up when encoded as
// UTF-8. // UTF-8.
static const unsigned kMax16BitCodeUnitSize = 3; static const unsigned kMax16BitCodeUnitSize = 3;
// The maximum size a single UTF-16 code unit known to be in the range
// [0,0xff] may take up when encoded as UTF-8.
static const unsigned kMax8BitCodeUnitSize = 2;
static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor);
using Utf8IncrementalBuffer = uint32_t; using Utf8IncrementalBuffer = uint32_t;
......
...@@ -6132,8 +6132,38 @@ class LiftoffCompiler { ...@@ -6132,8 +6132,38 @@ class LiftoffCompiler {
void StringEncodeWtf8(FullDecoder* decoder, void StringEncodeWtf8(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const EncodeWtf8Immediate<validate>& imm,
const Value& str, const Value& address) { const Value& str, const Value& offset) {
UNIMPLEMENTED(); LiftoffRegList pinned;
LiftoffAssembler::VarState& offset_var =
__ cache_state()->stack_state.end()[-1];
LiftoffRegister string_reg = pinned.set(
__ LoadToRegister(__ cache_state()->stack_state.end()[-2], pinned));
MaybeEmitNullCheck(decoder, string_reg.gp(), pinned, str.type);
LiftoffAssembler::VarState string_var(kRef, string_reg, 0);
LiftoffRegister memory_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(memory_reg, imm.memory.index);
LiftoffAssembler::VarState memory_var(kPointerKind, memory_reg, 0);
LiftoffRegister policy_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.policy.value));
LiftoffAssembler::VarState policy_var(kPointerKind, policy_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringEncodeWtf8,
MakeSig::Params(kRef, kI32, kSmiKind, kSmiKind),
{
string_var,
offset_var,
memory_var,
policy_var,
},
decoder->position());
__ DropValues(2);
RegisterDebugSideTableEntry(decoder, DebugSideTableBuilder::kDidSpill);
} }
void StringEncodeWtf16(FullDecoder* decoder, void StringEncodeWtf16(FullDecoder* decoder,
......
...@@ -798,15 +798,16 @@ struct StringConstImmediate { ...@@ -798,15 +798,16 @@ struct StringConstImmediate {
template <Decoder::ValidateFlag validate> template <Decoder::ValidateFlag validate>
struct Wtf8PolicyImmediate { struct Wtf8PolicyImmediate {
uint8_t value; StringRefWtf8Policy value;
const uint32_t length = 1; const uint32_t length = 1;
Wtf8PolicyImmediate(Decoder* decoder, const byte* pc) { Wtf8PolicyImmediate(Decoder* decoder, const byte* pc) {
value = decoder->read_u8<validate>(pc, "wtf8 policy"); uint8_t u8 = decoder->read_u8<validate>(pc, "wtf8 policy");
if (!VALIDATE(value <= kLastWtf8Policy)) { if (!VALIDATE(u8 <= kLastWtf8Policy)) {
DecodeError<validate>( DecodeError<validate>(
decoder, pc, "expected wtf8 policy 0, 1, or 2, but found %u", value); decoder, pc, "expected wtf8 policy 0, 1, or 2, but found %u", u8);
} }
value = static_cast<StringRefWtf8Policy>(u8);
} }
}; };
......
...@@ -1433,8 +1433,10 @@ class WasmGraphBuildingInterface { ...@@ -1433,8 +1433,10 @@ class WasmGraphBuildingInterface {
void StringEncodeWtf8(FullDecoder* decoder, void StringEncodeWtf8(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const EncodeWtf8Immediate<validate>& imm,
const Value& str, const Value& address) { const Value& str, const Value& offset) {
UNIMPLEMENTED(); builder_->StringEncodeWtf8(imm.memory.index, imm.policy.value, str.node,
NullCheckFor(str.type), offset.node,
decoder->position());
} }
void StringEncodeWtf16(FullDecoder* decoder, void StringEncodeWtf16(FullDecoder* decoder,
......
...@@ -128,6 +128,7 @@ struct WasmModule; ...@@ -128,6 +128,7 @@ struct WasmModule;
V(WasmStringConst) \ V(WasmStringConst) \
V(WasmStringMeasureUtf8) \ V(WasmStringMeasureUtf8) \
V(WasmStringMeasureWtf8) \ V(WasmStringMeasureWtf8) \
V(WasmStringEncodeWtf8) \
V(WasmStringViewWtf16GetCodeUnit) \ V(WasmStringViewWtf16GetCodeUnit) \
V(WasmStringViewWtf16Slice) V(WasmStringViewWtf16Slice)
......
...@@ -12,6 +12,7 @@ let kSig_i_w = makeSig([kWasmStringRef], [kWasmI32]); ...@@ -12,6 +12,7 @@ let kSig_i_w = makeSig([kWasmStringRef], [kWasmI32]);
let kSig_i_wi = makeSig([kWasmStringRef, kWasmI32], [kWasmI32]); let kSig_i_wi = makeSig([kWasmStringRef, kWasmI32], [kWasmI32]);
let kSig_w_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32], let kSig_w_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32],
[kWasmStringRef]); [kWasmStringRef]);
let kSig_v_wi = makeSig([kWasmStringRef, kWasmI32], []);
function encodeWtf8(str) { function encodeWtf8(str) {
// String iterator coalesces surrogate pairs. // String iterator coalesces surrogate pairs.
...@@ -163,6 +164,17 @@ function makeWtf16TestDataSegment() { ...@@ -163,6 +164,17 @@ function makeWtf16TestDataSegment() {
} }
})(); })();
function IsSurrogate(codepoint) {
return 0xD800 <= codepoint && codepoint <= 0xDFFF
}
function HasIsolatedSurrogate(str) {
for (let codepoint of str) {
let value = codepoint.codePointAt(0);
if (IsSurrogate(value)) return true;
}
return false;
}
(function TestStringMeasureUtf8AndWtf8() { (function TestStringMeasureUtf8AndWtf8() {
let builder = new WasmModuleBuilder(); let builder = new WasmModuleBuilder();
...@@ -194,14 +206,6 @@ function makeWtf16TestDataSegment() { ...@@ -194,14 +206,6 @@ function makeWtf16TestDataSegment() {
kGCPrefix, kExprStringMeasureWtf8 kGCPrefix, kExprStringMeasureWtf8
]); ]);
function HasIsolatedSurrogate(str) {
for (let codepoint of str) {
let value = codepoint.codePointAt(0);
if (0xD800 <= value && value <= 0xDFFF) return true;
}
return false;
}
let instance = builder.instantiate(); let instance = builder.instantiate();
for (let str of interestingStrings) { for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str); let wtf8 = encodeWtf8(str);
...@@ -245,6 +249,107 @@ function makeWtf16TestDataSegment() { ...@@ -245,6 +249,107 @@ function makeWtf16TestDataSegment() {
WebAssembly.RuntimeError, "dereferencing a null pointer"); WebAssembly.RuntimeError, "dereferencing a null pointer");
})(); })();
(function TestStringEncodeWtf8() {
let builder = new WasmModuleBuilder();
builder.addMemory(1, undefined, true /* exported */, false);
for (let [policy, name] of ["utf8", "wtf8", "replace"].entries()) {
builder.addFunction("encode_" + name, kSig_v_wi)
.exportFunc()
.addBody([
kExprLocalGet, 0,
kExprLocalGet, 1,
kGCPrefix, kExprStringEncodeWtf8, 0, policy,
]);
}
builder.addFunction("encode_null", kSig_v_v)
.exportFunc()
.addBody([
kExprRefNull, kStringRefCode,
kExprI32Const, 42,
kGCPrefix, kExprStringEncodeWtf8, 0, 0,
]);
let instance = builder.instantiate();
let memory = new Uint8Array(instance.exports.memory.buffer);
function clearMemory(low, high) {
for (let i = low; i < high; i++) {
memory[i] = 0;
}
}
function assertMemoryBytesZero(low, high) {
for (let i = low; i < high; i++) {
assertEquals(0, memory[i]);
}
}
function checkMemory(offset, bytes) {
let slop = 64;
assertMemoryBytesZero(Math.max(0, offset - slop), offset);
for (let i = 0; i < bytes.length; i++) {
assertEquals(bytes[i], memory[offset + i]);
}
assertMemoryBytesZero(offset + bytes.length,
Math.min(memory.length,
offset + bytes.length + slop));
}
for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str);
let offset = memory.length - wtf8.length;
instance.exports.encode_wtf8(str, offset);
checkMemory(offset, wtf8);
clearMemory(offset, offset + wtf8.length);
}
for (let str of interestingStrings) {
let offset = 0;
if (HasIsolatedSurrogate(str)) {
assertThrows(() => instance.exports.encode_utf8(str, offset),
WebAssembly.RuntimeError,
"Failed to encode string as UTF-8: contains unpaired surrogate");
} else {
let wtf8 = encodeWtf8(str);
instance.exports.encode_utf8(str, offset);
checkMemory(offset, wtf8);
clearMemory(offset, offset + wtf8.length);
}
}
for (let str of interestingStrings) {
let offset = 42;
instance.exports.encode_replace(str, offset);
let replaced = '';
for (let codepoint of str) {
codepoint = codepoint.codePointAt(0);
if (IsSurrogate(codepoint)) codepoint = 0xFFFD;
replaced += String.fromCodePoint(codepoint);
}
if (!HasIsolatedSurrogate(str)) assertEquals(str, replaced);
let wtf8 = encodeWtf8(replaced);
checkMemory(offset, wtf8);
clearMemory(offset, offset + wtf8.length);
}
assertThrows(() => instance.exports.encode_null(),
WebAssembly.RuntimeError, "dereferencing a null pointer");
checkMemory(memory.length - 10, []);
for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str);
let offset = memory.length - wtf8.length + 1;
assertThrows(() => instance.exports.encode_wtf8(str, offset),
WebAssembly.RuntimeError, "memory access out of bounds");
assertThrows(() => instance.exports.encode_utf8(str, offset),
WebAssembly.RuntimeError, "memory access out of bounds");
assertThrows(() => instance.exports.encode_replace(str, offset),
WebAssembly.RuntimeError, "memory access out of bounds");
checkMemory(offset - 1, []);
}
})();
(function TestStringViewWtf16() { (function TestStringViewWtf16() {
let builder = new WasmModuleBuilder(); let builder = new WasmModuleBuilder();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment