Commit e8610ab8 authored by Andy Wingo's avatar Andy Wingo Committed by V8 LUCI CQ

[stringrefs] Implement stringview_wtf8.encode

Bug: v8:12868
Change-Id: I714fffec248114a7ff61479f122a7df538e8e8d5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3755140
Commit-Queue: Andy Wingo <wingo@igalia.com>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81666}
parent 902759b8
...@@ -55,6 +55,8 @@ extern runtime WasmStringEncodeWtf8Array( ...@@ -55,6 +55,8 @@ extern runtime WasmStringEncodeWtf8Array(
extern runtime WasmStringEncodeWtf16( extern runtime WasmStringEncodeWtf16(
Context, WasmInstanceObject, Smi, String, Number, Smi, Smi): JSAny; Context, WasmInstanceObject, Smi, String, Number, Smi, Smi): JSAny;
extern runtime WasmStringAsWtf8(Context, String): ByteArray; extern runtime WasmStringAsWtf8(Context, String): ByteArray;
extern runtime WasmStringViewWtf8Encode(
Context, WasmInstanceObject, Smi, ByteArray, Number, Number, Number): JSAny;
} }
namespace unsafe { namespace unsafe {
...@@ -941,7 +943,7 @@ macro IsWtf8CodepointStart(view: ByteArray, pos: uint32): bool { ...@@ -941,7 +943,7 @@ macro IsWtf8CodepointStart(view: ByteArray, pos: uint32): bool {
// doesn't start with 0b10xxxxxx. // doesn't start with 0b10xxxxxx.
return (view.bytes[Convert<uintptr>(pos)] & 0xc0) != 0x80; return (view.bytes[Convert<uintptr>(pos)] & 0xc0) != 0x80;
} }
builtin WasmWtf8PositionTreatment(view: ByteArray, pos: uint32): uint32 { macro AlignWtf8PositionForward(view: ByteArray, pos: uint32): uint32 {
const length = Unsigned(SmiToInt32(view.length)); const length = Unsigned(SmiToInt32(view.length));
if (pos >= length) return length; if (pos >= length) return length;
...@@ -958,20 +960,50 @@ builtin WasmWtf8PositionTreatment(view: ByteArray, pos: uint32): uint32 { ...@@ -958,20 +960,50 @@ builtin WasmWtf8PositionTreatment(view: ByteArray, pos: uint32): uint32 {
return pos + 3; return pos + 3;
} }
macro AlignWtf8PositionBackward(view: ByteArray, pos: uint32): uint32 {
// Return the highest offset that starts a codepoint which is not
// greater than pos. Preconditions: pos in [0, view.length), view
// contains well-formed WTF-8.
if (IsWtf8CodepointStart(view, pos)) return pos;
if (IsWtf8CodepointStart(view, pos - 1)) return pos - 1;
if (IsWtf8CodepointStart(view, pos - 2)) return pos - 2;
return pos - 3;
}
builtin WasmStringViewWtf8Advance( builtin WasmStringViewWtf8Advance(
view: ByteArray, pos: uint32, bytes: uint32): uint32 { view: ByteArray, pos: uint32, bytes: uint32): uint32 {
const clampedPos = AlignWtf8PositionForward(view, pos);
if (bytes == 0) return clampedPos;
const length = Unsigned(SmiToInt32(view.length)); const length = Unsigned(SmiToInt32(view.length));
const clampedPos = WasmWtf8PositionTreatment(view, pos);
if (bytes >= length - clampedPos) return length; if (bytes >= length - clampedPos) return length;
const newPos = clampedPos + bytes; return AlignWtf8PositionBackward(view, clampedPos + bytes);
}
// Return the highest offset that starts a codepoint which is not greater struct NewPositionAndBytesWritten {
// than newPos. newPosition: uintptr;
if (IsWtf8CodepointStart(view, newPos)) return newPos; bytesWritten: uintptr;
if (IsWtf8CodepointStart(view, newPos - 1)) return newPos - 1; }
if (IsWtf8CodepointStart(view, newPos - 2)) return newPos - 2; builtin WasmStringViewWtf8Encode(
return newPos - 3; addr: uint32, pos: uint32, bytes: uint32, view: ByteArray, memory: Smi,
policy: Smi): NewPositionAndBytesWritten {
const start = WasmStringViewWtf8Advance(view, pos, 0);
const end = WasmStringViewWtf8Advance(view, start, bytes);
const instance = LoadInstanceFromFrame();
const context = LoadContextFromInstance(instance);
// kMaxArgs in code-assembler.cc:CallRunTimeImpl is currently limited
// to 6 arguments when calling a runtime function. Throw away the
// memory argument for now; when we need multi-memory we can bump
// kMaxArgs.
dcheck(memory == SmiFromInt32(0));
// Always call out to run-time, to catch invalid addr.
runtime::WasmStringViewWtf8Encode(
context, instance, policy, view, WasmUint32ToNumber(addr),
WasmUint32ToNumber(start), WasmUint32ToNumber(end));
return NewPositionAndBytesWritten{
newPosition: Convert<uintptr>(end),
bytesWritten: Convert<uintptr>(end - start)
};
} }
transitioning builtin WasmStringViewWtf16GetCodeUnit( transitioning builtin WasmStringViewWtf16GetCodeUnit(
string: String, offset: uint32): uint32 { string: String, offset: uint32): uint32 {
......
...@@ -5913,6 +5913,20 @@ Node* WasmGraphBuilder::StringViewWtf8Advance(Node* view, ...@@ -5913,6 +5913,20 @@ Node* WasmGraphBuilder::StringViewWtf8Advance(Node* view,
Operator::kNoDeopt, view, pos, bytes); Operator::kNoDeopt, view, pos, bytes);
} }
void WasmGraphBuilder::StringViewWtf8Encode(
uint32_t memory, wasm::StringRefWtf8Policy policy, Node* view,
CheckForNull null_check, Node* addr, Node* pos, Node* bytes,
Node** next_pos, Node** bytes_written, wasm::WasmCodePosition position) {
if (null_check == kWithNullCheck) {
view = AssertNotNull(view, position);
}
Node* pair = gasm_->CallBuiltin(
Builtin::kWasmStringViewWtf8Encode, Operator::kNoDeopt, addr, pos, bytes,
view, gasm_->SmiConstant(memory), gasm_->SmiConstant(policy));
*next_pos = gasm_->Projection(0, pair);
*bytes_written = gasm_->Projection(1, pair);
}
Node* WasmGraphBuilder::StringViewWtf16GetCodeUnit( Node* WasmGraphBuilder::StringViewWtf16GetCodeUnit(
Node* string, CheckForNull null_check, Node* offset, Node* string, CheckForNull null_check, Node* offset,
wasm::WasmCodePosition position) { wasm::WasmCodePosition position) {
......
...@@ -571,6 +571,11 @@ class WasmGraphBuilder { ...@@ -571,6 +571,11 @@ class WasmGraphBuilder {
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* StringViewWtf8Advance(Node* view, CheckForNull null_check, Node* pos, Node* StringViewWtf8Advance(Node* view, CheckForNull null_check, Node* pos,
Node* bytes, wasm::WasmCodePosition position); Node* bytes, wasm::WasmCodePosition position);
void StringViewWtf8Encode(uint32_t memory, wasm::StringRefWtf8Policy policy,
Node* view, CheckForNull null_check, Node* addr,
Node* pos, Node* bytes, Node** next_pos,
Node** bytes_written,
wasm::WasmCodePosition position);
Node* StringViewWtf16GetCodeUnit(Node* string, CheckForNull null_check, Node* StringViewWtf16GetCodeUnit(Node* string, CheckForNull null_check,
Node* offset, Node* offset,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
......
...@@ -1235,5 +1235,54 @@ RUNTIME_FUNCTION(Runtime_WasmStringAsWtf8) { ...@@ -1235,5 +1235,54 @@ RUNTIME_FUNCTION(Runtime_WasmStringAsWtf8) {
return *array; return *array;
} }
RUNTIME_FUNCTION(Runtime_WasmStringViewWtf8Encode) {
ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(6, args.length());
HandleScope scope(isolate);
WasmInstanceObject instance = WasmInstanceObject::cast(args[0]);
uint32_t policy_value = args.positive_smi_value_at(1);
Handle<ByteArray> array(ByteArray::cast(args[2]), isolate);
uint32_t addr = NumberToUint32(args[3]);
uint32_t start = NumberToUint32(args[4]);
uint32_t end = NumberToUint32(args[5]);
DCHECK(policy_value <= wasm::kLastWtf8Policy);
DCHECK_LE(start, end);
DCHECK(base::IsInBounds<size_t>(start, end - start, array->length()));
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value);
size_t length = end - start;
if (!base::IsInBounds<size_t>(addr, length, instance.memory_size())) {
return ThrowWasmError(isolate, MessageTemplate::kWasmTrapMemOutOfBounds);
}
byte* memory_start = reinterpret_cast<byte*>(instance.memory_start());
const byte* src =
reinterpret_cast<const byte*>(array->GetDataStartAddress() + start);
byte* dst = memory_start + addr;
std::vector<size_t> surrogates;
if (policy != wasm::kWtf8PolicyAccept) {
unibrow::Wtf8::ScanForSurrogates({src, length}, &surrogates);
if (policy == wasm::kWtf8PolicyReject && !surrogates.empty()) {
return ThrowWasmError(isolate,
MessageTemplate::kWasmTrapStringIsolatedSurrogate);
}
}
MemCopy(dst, src, length);
for (size_t surrogate : surrogates) {
DCHECK_LT(surrogate, length);
DCHECK_EQ(policy, wasm::kWtf8PolicyReplace);
unibrow::Utf8::Encode(reinterpret_cast<char*>(dst + surrogate),
unibrow::Utf8::kBadChar, 0, false);
}
// Unused.
return Smi(0);
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -619,7 +619,8 @@ namespace internal { ...@@ -619,7 +619,8 @@ namespace internal {
F(WasmStringEncodeWtf8, 5, 1) \ F(WasmStringEncodeWtf8, 5, 1) \
F(WasmStringEncodeWtf16, 6, 1) \ F(WasmStringEncodeWtf16, 6, 1) \
F(WasmStringEncodeWtf8Array, 4, 1) \ F(WasmStringEncodeWtf8Array, 4, 1) \
F(WasmStringAsWtf8, 1, 1) F(WasmStringAsWtf8, 1, 1) \
F(WasmStringViewWtf8Encode, 6, 1)
#define FOR_EACH_INTRINSIC_WASM_TEST(F, I) \ #define FOR_EACH_INTRINSIC_WASM_TEST(F, I) \
F(DeserializeWasmModule, 2, 1) \ F(DeserializeWasmModule, 2, 1) \
......
...@@ -5,8 +5,12 @@ ...@@ -5,8 +5,12 @@
// This file was generated at 2014-10-08 15:25:47.940335 // This file was generated at 2014-10-08 15:25:47.940335
#include "src/strings/unicode.h" #include "src/strings/unicode.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <vector>
#include "src/strings/unicode-inl.h" #include "src/strings/unicode-inl.h"
#if V8_ENABLE_WEBASSEMBLY #if V8_ENABLE_WEBASSEMBLY
...@@ -255,6 +259,30 @@ bool Wtf8::ValidateEncoding(const byte* bytes, size_t length) { ...@@ -255,6 +259,30 @@ bool Wtf8::ValidateEncoding(const byte* bytes, size_t length) {
} }
return state == State::kAccept; return state == State::kAccept;
} }
// Precondition: valid WTF-8.
void Wtf8::ScanForSurrogates(const v8::base::Vector<const byte>& wtf8,
std::vector<size_t>* surrogate_offsets) {
// A surrogate codepoint is encoded in a three-byte sequence:
//
// 0xED [0xA0,0xBF] [0x80,0xBF]
//
// If the first byte is 0xED, you already have a 50% chance of the value being
// a surrogate; you just have to check the second byte. (There are
// three-byte non-surrogates starting with 0xED whose second byte is in
// [0x80,0x9F].) Could speed this up with SWAR; most likely case is that no
// byte in the array is 0xED.
const byte kWtf8SurrogateFirstByte = 0xED;
const byte kWtf8SurrogateSecondByteHighBit = 0x20;
for (size_t i = 0; i < wtf8.size(); i++) {
if (wtf8[i] == kWtf8SurrogateFirstByte &&
(wtf8[i + 1] & kWtf8SurrogateSecondByteHighBit)) {
// Record the byte offset of the encoded surrogate.
surrogate_offsets->push_back(i);
}
}
}
#endif // V8_ENABLE_WEBASSEMBLY #endif // V8_ENABLE_WEBASSEMBLY
// Uppercase: point.category == 'Lu' // Uppercase: point.category == 'Lu'
......
...@@ -6,7 +6,9 @@ ...@@ -6,7 +6,9 @@
#define V8_STRINGS_UNICODE_H_ #define V8_STRINGS_UNICODE_H_
#include <sys/types.h> #include <sys/types.h>
#include "src/base/bit-field.h" #include "src/base/bit-field.h"
#include "src/base/vector.h"
#include "src/common/globals.h" #include "src/common/globals.h"
#include "src/third_party/utf8-decoder/utf8-decoder.h" #include "src/third_party/utf8-decoder/utf8-decoder.h"
/** /**
...@@ -230,6 +232,9 @@ class V8_EXPORT_PRIVATE Wtf8 { ...@@ -230,6 +232,9 @@ class V8_EXPORT_PRIVATE Wtf8 {
// this function checks for a valid "generalized UTF-8" sequence, with the // this function checks for a valid "generalized UTF-8" sequence, with the
// additional constraint that surrogate pairs are not allowed. // additional constraint that surrogate pairs are not allowed.
static bool ValidateEncoding(const byte* str, size_t length); static bool ValidateEncoding(const byte* str, size_t length);
static void ScanForSurrogates(const v8::base::Vector<const byte>& wtf8,
std::vector<size_t>* surrogate_offsets);
}; };
#endif // V8_ENABLE_WEBASSEMBLY #endif // V8_ENABLE_WEBASSEMBLY
......
...@@ -6662,7 +6662,49 @@ class LiftoffCompiler { ...@@ -6662,7 +6662,49 @@ class LiftoffCompiler {
const Value& view, const Value& addr, const Value& view, const Value& addr,
const Value& pos, const Value& bytes, const Value& pos, const Value& bytes,
Value* next_pos, Value* bytes_written) { Value* next_pos, Value* bytes_written) {
UNIMPLEMENTED(); LiftoffRegList pinned;
LiftoffAssembler::VarState& bytes_var =
__ cache_state()->stack_state.end()[-1];
LiftoffAssembler::VarState& pos_var =
__ cache_state()->stack_state.end()[-2];
LiftoffAssembler::VarState& addr_var =
__ cache_state()->stack_state.end()[-3];
LiftoffRegister view_reg = pinned.set(
__ LoadToRegister(__ cache_state()->stack_state.end()[-4], pinned));
MaybeEmitNullCheck(decoder, view_reg.gp(), pinned, view.type);
LiftoffAssembler::VarState view_var(kRef, view_reg, 0);
LiftoffRegister memory_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(memory_reg, imm.memory.index);
LiftoffAssembler::VarState memory_var(kSmiKind, memory_reg, 0);
LiftoffRegister policy_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.policy.value));
LiftoffAssembler::VarState policy_var(kSmiKind, policy_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringViewWtf8Encode,
MakeSig::Returns(kI32, kI32)
.Params(kI32, kI32, kI32, kRef, kSmiKind, kSmiKind),
{
addr_var,
pos_var,
bytes_var,
view_var,
memory_var,
policy_var,
},
decoder->position());
__ DropValues(4);
RegisterDebugSideTableEntry(decoder, DebugSideTableBuilder::kDidSpill);
LiftoffRegister next_pos_reg(kReturnRegister0);
__ PushRegister(kI32, next_pos_reg);
LiftoffRegister bytes_written_reg(kReturnRegister1);
__ PushRegister(kI32, bytes_written_reg);
} }
void StringViewWtf8Slice(FullDecoder* decoder, const Value& view, void StringViewWtf8Slice(FullDecoder* decoder, const Value& view,
......
...@@ -1509,7 +1509,10 @@ class WasmGraphBuildingInterface { ...@@ -1509,7 +1509,10 @@ class WasmGraphBuildingInterface {
const Value& view, const Value& addr, const Value& view, const Value& addr,
const Value& pos, const Value& bytes, const Value& pos, const Value& bytes,
Value* next_pos, Value* bytes_written) { Value* next_pos, Value* bytes_written) {
UNIMPLEMENTED(); builder_->StringViewWtf8Encode(
imm.memory.index, imm.policy.value, view.node, NullCheckFor(view.type),
addr.node, pos.node, bytes.node, &next_pos->node, &bytes_written->node,
decoder->position());
} }
void StringViewWtf8Slice(FullDecoder* decoder, const Value& view, void StringViewWtf8Slice(FullDecoder* decoder, const Value& view,
......
...@@ -141,7 +141,8 @@ struct WasmModule; ...@@ -141,7 +141,8 @@ struct WasmModule;
V(WasmStringEncodeWtf8Array) \ V(WasmStringEncodeWtf8Array) \
V(WasmStringEncodeWtf16Array) \ V(WasmStringEncodeWtf16Array) \
V(WasmStringAsWtf8) \ V(WasmStringAsWtf8) \
V(WasmStringViewWtf8Advance) V(WasmStringViewWtf8Advance) \
V(WasmStringViewWtf8Encode)
// Sorted, disjoint and non-overlapping memory regions. A region is of the // Sorted, disjoint and non-overlapping memory regions. A region is of the
// form [start, end). So there's no [start, end), [end, other_end), // form [start, end). So there's no [start, end), [end, other_end),
......
...@@ -14,6 +14,8 @@ let kSig_i_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32], [kWasmI32]); ...@@ -14,6 +14,8 @@ let kSig_i_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32], [kWasmI32]);
let kSig_i_ww = makeSig([kWasmStringRef, kWasmStringRef], [kWasmI32]); let kSig_i_ww = makeSig([kWasmStringRef, kWasmStringRef], [kWasmI32]);
let kSig_i_wiii = makeSig([kWasmStringRef, kWasmI32, kWasmI32, kWasmI32], let kSig_i_wiii = makeSig([kWasmStringRef, kWasmI32, kWasmI32, kWasmI32],
[kWasmI32]); [kWasmI32]);
let kSig_ii_wiii = makeSig([kWasmStringRef, kWasmI32, kWasmI32, kWasmI32],
[kWasmI32, kWasmI32]);
let kSig_w_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32], let kSig_w_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32],
[kWasmStringRef]); [kWasmStringRef]);
let kSig_w_ww = makeSig([kWasmStringRef, kWasmStringRef], [kWasmStringRef]); let kSig_w_ww = makeSig([kWasmStringRef, kWasmStringRef], [kWasmStringRef]);
...@@ -45,11 +47,50 @@ function encodeWtf8(str) { ...@@ -45,11 +47,50 @@ function encodeWtf8(str) {
return out; return out;
} }
// Compute the string that corresponds to the valid WTF-8 bytes from
// start (inclusive) to end (exclusive).
function decodeWtf8(wtf8, start, end) {
let result = ''
while (start < end) {
let cp;
let b0 = wtf8[start];
if ((b0 & 0xC0) == 0x80) {
// The precondition is that we have valid WTF-8 bytes and that
// start and end are codepoint boundaries. Here we make a weak
// assertion about that invariant, that we don't start decoding
// with a continuation byte.
throw new Error('invalid wtf8');
}
if (b0 <= 0x7F) {
cp = b0;
start += 1;
} else if (b0 <= 0xDF) {
cp = (b0 & 0x1f) << 6;
cp |= (wtf8[start + 1] & 0x3f);
start += 2;
} else if (b0 <= 0xEF) {
cp = (b0 & 0x0f) << 12;
cp |= (wtf8[start + 1] & 0x3f) << 6;
cp |= (wtf8[start + 2] & 0x3f);
start += 3;
} else {
cp = (b0 & 0x07) << 18;
cp |= (wtf8[start + 1] & 0x3f) << 12;
cp |= (wtf8[start + 2] & 0x3f) << 6;
cp |= (wtf8[start + 3] & 0x3f);
start += 4;
}
result += String.fromCodePoint(cp);
}
assertEquals(start, end);
return result;
}
let interestingStrings = ['', let interestingStrings = ['',
'ascii', 'ascii',
'latin \xa9 1', 'latin \xa9 1',
'two \ucccc byte', 'two \ucccc byte',
'surrogate \ud800\udc000 pair', 'surrogate \ud800\udc00 pair',
'isolated \ud800 leading', 'isolated \ud800 leading',
'isolated \udc00 trailing', 'isolated \udc00 trailing',
'\ud800 isolated leading at beginning', '\ud800 isolated leading at beginning',
...@@ -783,6 +824,8 @@ function makeWtf16TestDataSegment() { ...@@ -783,6 +824,8 @@ function makeWtf16TestDataSegment() {
(function TestStringViewWtf8() { (function TestStringViewWtf8() {
let builder = new WasmModuleBuilder(); let builder = new WasmModuleBuilder();
builder.addMemory(1, undefined, true /* exported */, false);
builder.addFunction("advance", kSig_i_wii) builder.addFunction("advance", kSig_i_wii)
.exportFunc() .exportFunc()
.addBody([ .addBody([
...@@ -802,6 +845,32 @@ function makeWtf16TestDataSegment() { ...@@ -802,6 +845,32 @@ function makeWtf16TestDataSegment() {
kGCPrefix, kExprStringViewWtf8Advance kGCPrefix, kExprStringViewWtf8Advance
]); ]);
for (let [name, policy] of Object.entries({utf8: kWtf8PolicyReject,
wtf8: kWtf8PolicyAccept,
replace: kWtf8PolicyReplace})) {
builder.addFunction(`encode_${name}`, kSig_ii_wiii)
.exportFunc()
.addBody([
kExprLocalGet, 0,
kGCPrefix, kExprStringAsWtf8,
kExprLocalGet, 1,
kExprLocalGet, 2,
kExprLocalGet, 3,
kGCPrefix, kExprStringViewWtf8Encode, 0, policy
]);
}
builder.addFunction("encode_null", kSig_v_v)
.exportFunc()
.addBody([
kExprRefNull, kStringViewWtf8Code,
kExprI32Const, 0,
kExprI32Const, 0,
kExprI32Const, 0,
kGCPrefix, kExprStringViewWtf8Encode, 0, kWtf8PolicyAccept,
kExprDrop,
kExprDrop
]);
function Wtf8StartsCodepoint(wtf8, offset) { function Wtf8StartsCodepoint(wtf8, offset) {
return (wtf8[offset] & 0xc0) != 0x80; return (wtf8[offset] & 0xc0) != 0x80;
} }
...@@ -821,6 +890,7 @@ function makeWtf16TestDataSegment() { ...@@ -821,6 +890,7 @@ function makeWtf16TestDataSegment() {
} }
let instance = builder.instantiate(); let instance = builder.instantiate();
let memory = new Uint8Array(instance.exports.memory.buffer);
for (let pos = 0; pos < "ascii".length; pos++) { for (let pos = 0; pos < "ascii".length; pos++) {
assertEquals(pos + 1, instance.exports.advance("ascii", pos, 1)); assertEquals(pos + 1, instance.exports.advance("ascii", pos, 1));
...@@ -836,8 +906,8 @@ function makeWtf16TestDataSegment() { ...@@ -836,8 +906,8 @@ function makeWtf16TestDataSegment() {
instance.exports.advance(str, 0, wtf8.length + 1)); instance.exports.advance(str, 0, wtf8.length + 1));
assertEquals(wtf8.length, assertEquals(wtf8.length,
instance.exports.advance(str, wtf8.length + 1, 0)); instance.exports.advance(str, wtf8.length + 1, 0));
for (let pos = 0; pos < wtf8.length; pos++) { for (let pos = 0; pos <= wtf8.length; pos++) {
for (let bytes = 0; bytes < wtf8.length - pos; bytes++) { for (let bytes = 0; bytes <= wtf8.length - pos; bytes++) {
assertEquals( assertEquals(
CodepointStart(wtf8, Wtf8PositionTreatment(wtf8, pos) + bytes), CodepointStart(wtf8, Wtf8PositionTreatment(wtf8, pos) + bytes),
instance.exports.advance(str, pos, bytes)); instance.exports.advance(str, pos, bytes));
...@@ -845,6 +915,87 @@ function makeWtf16TestDataSegment() { ...@@ -845,6 +915,87 @@ function makeWtf16TestDataSegment() {
} }
} }
function checkEncoding(variant, str, slice, start, length) {
let all_bytes = encodeWtf8(str);
let bytes = encodeWtf8(slice);
function clearMemory(low, high) {
for (let i = low; i < high; i++) {
memory[i] = 0;
}
}
function assertMemoryBytesZero(low, high) {
for (let i = low; i < high; i++) {
assertEquals(0, memory[i]);
}
}
function checkMemory(offset, bytes) {
let slop = 64;
assertMemoryBytesZero(Math.max(0, offset - slop), offset);
for (let i = 0; i < bytes.length; i++) {
assertEquals(bytes[i], memory[offset + i]);
}
assertMemoryBytesZero(offset + bytes.length,
Math.min(memory.length,
offset + bytes.length + slop));
}
let encode = instance.exports[`encode_${variant}`];
let expected_start = Wtf8PositionTreatment(all_bytes, start);
let expected_end = CodepointStart(all_bytes, expected_start + bytes.length);
for (let offset of [0, 42, memory.length - bytes.length]) {
assertArrayEquals([expected_end, expected_end - expected_start],
encode(str, offset, start, length));
checkMemory(offset, bytes);
clearMemory(offset, offset + bytes.length);
}
assertThrows(() => encode(str, memory.length - bytes.length + 2,
start, length),
WebAssembly.RuntimeError, "memory access out of bounds");
checkMemory(memory.length - bytes.length - 2, []);
}
checkEncoding('utf8', "fox", "f", 0, 1);
checkEncoding('utf8', "fox", "fo", 0, 2);
checkEncoding('utf8', "fox", "fox", 0, 3);
checkEncoding('utf8', "fox", "fox", 0, 300);
checkEncoding('utf8', "fox", "", 1, 0);
checkEncoding('utf8', "fox", "o", 1, 1);
checkEncoding('utf8', "fox", "ox", 1, 2);
checkEncoding('utf8', "fox", "ox", 1, 200);
checkEncoding('utf8', "fox", "", 2, 0);
checkEncoding('utf8', "fox", "x", 2, 1);
checkEncoding('utf8', "fox", "x", 2, 2);
checkEncoding('utf8', "fox", "", 3, 0);
checkEncoding('utf8', "fox", "", 3, 1_000_000_000);
checkEncoding('utf8', "fox", "", 1_000_000_000, 1_000_000_000);
checkEncoding('utf8', "fox", "", 100, 100);
for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str);
for (let pos = 0; pos <= wtf8.length; pos++) {
for (let bytes = 0; bytes <= wtf8.length - pos; bytes++) {
let start = Wtf8PositionTreatment(wtf8, pos);
let end = CodepointStart(wtf8, start + bytes);
let expected = decodeWtf8(wtf8, start, end);
checkEncoding('wtf8', str, expected, pos, bytes);
if (HasIsolatedSurrogate(expected)) {
assertThrows(() => instance.exports.encode_utf8(str, 0, pos, bytes),
WebAssembly.RuntimeError,
"Failed to encode string as UTF-8: " +
"contains unpaired surrogate");
checkEncoding('replace', str,
ReplaceIsolatedSurrogates(expected), pos, bytes);
} else {
checkEncoding('utf8', str, expected, pos, bytes);
checkEncoding('replace', str, expected, pos, bytes);
}
}
}
}
assertThrows(() => instance.exports.advance_null(), assertThrows(() => instance.exports.advance_null(),
WebAssembly.RuntimeError, "dereferencing a null pointer"); WebAssembly.RuntimeError, "dereferencing a null pointer");
assertThrows(() => instance.exports.encode_null(),
WebAssembly.RuntimeError, "dereferencing a null pointer");
})(); })();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment