Commit ebaf3fee authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

Improve test cases for Intl.Segmenter

Fix containing() w/ n point to surrogate tail.

Bug: v8:6891
Change-Id: I4d7e4083ba409eecaefaf2ec5c67b7226bd7c97c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2348208
Commit-Queue: Frank Tang <ftang@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69363}
parent 1b35c0fa
......@@ -77,6 +77,9 @@ MaybeHandle<Object> JSSegments::Containing(Isolate* isolate,
return isolate->factory()->undefined_value();
}
// n may point to the surrogate tail- adjust it back to the lead.
n = segments->unicode_string().raw()->getChar32Start(n);
icu::BreakIterator* break_iterator = segments->icu_break_iterator().raw();
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
int32_t start_index =
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const segmenter = new Intl.Segmenter();
const waving_hand_light_skin_tone = "\uD83D\uDC4B\uD83C\uDFFB";
const man_light_skin_tone_red_hair =
"\uD83D\uDC68\uD83C\uDFFB\u200D\uD83E\uDDB0";
// 1 2
// 034566 89012 3 4 56 89 01
const input = "ซิ่ง Ame\u0301lie台北\uD800\uDCB0 " + waving_hand_light_skin_tone +
// 2
// 2345678
man_light_skin_tone_red_hair;
const segments = segmenter.segment(input);
// Test less than 0
assertEquals(undefined, segments.containing(-1));
// Test longer than input
assertEquals(undefined, segments.containing(input.length));
assertEquals(undefined, segments.containing(input.length + 1));
// Test modifier in Thai
assertEquals({segment: "ซิ่", index: 0, input}, segments.containing(0));
assertEquals({segment: "ซิ่", index: 0, input}, segments.containing(1));
assertEquals({segment: "ซิ่", index: 0, input}, segments.containing(2));
// Test basic Thai
assertEquals({segment: "ง", index: 3, input}, segments.containing(3));
// Test SPACE
assertEquals({segment: " ", index: 4, input}, segments.containing(4));
// Test ASCII
assertEquals({segment: "A", index: 5, input}, segments.containing(5));
assertEquals({segment: "m", index: 6, input}, segments.containing(6));
// Test ASCII with modifier
assertEquals({segment: "e\u0301", index: 7, input}, segments.containing(7));
assertEquals({segment: "e\u0301", index: 7, input}, segments.containing(8));
// Test ASCII
assertEquals({segment: "l", index: 9, input}, segments.containing(9));
assertEquals({segment: "i", index: 10, input}, segments.containing(10));
assertEquals({segment: "e", index: 11, input}, segments.containing(11));
// Test Han
assertEquals({segment: "台", index: 12, input}, segments.containing(12));
assertEquals({segment: "北", index: 13, input}, segments.containing(13));
// Test Surrogate pairs
assertEquals({segment: "𐂰", index: 14, input}, segments.containing(14));
assertEquals({segment: "𐂰", index: 14, input}, segments.containing(15));
// Test SPACE
assertEquals({segment: " ", index: 16, input}, segments.containing(16));
// Test Emoji modifier: U+1F44B U+1F3FB
const emoji1 = {segment: waving_hand_light_skin_tone, index: 17, input};
assertEquals(emoji1, segments.containing(17));
assertEquals(emoji1, segments.containing(18));
assertEquals(emoji1, segments.containing(19));
assertEquals(emoji1, segments.containing(20));
// Test Emoji modifiers sequence: U+1F468 U+1F3FB U+200D U+1F9B0
const emoji2 = {segment: man_light_skin_tone_red_hair, index: 21, input};
assertEquals(emoji2, segments.containing(21));
assertEquals(emoji2, segments.containing(22));
assertEquals(emoji2, segments.containing(23));
assertEquals(emoji2, segments.containing(24));
assertEquals(emoji2, segments.containing(25));
assertEquals(emoji2, segments.containing(26));
assertEquals(emoji2, segments.containing(27));
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
// Test to ensure the nested calling of the next method won't caused
// confusion to each other.
const segmenter = new Intl.Segmenter();
const input = "ABCD";
const segments = segmenter.segment(input);
let result = [];
for (let v1 of segments) {
for (let v2 of segments) {
result.push(v1.segment);
result.push(v2.segment);
}
result.push(":");
}
assertEquals("AAABACAD:BABBBCBD:CACBCCCD:DADBDCDD:", result.join(""));
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
// Test to ensure the calling of containing() won't impact the calling of
// the next() method.
const segmenter = new Intl.Segmenter();
const man_light_skin_tone_red_hair =
"\uD83D\uDC68\uD83C\uDFFB\u200D\uD83E\uDDB0";
const input = "ABCD" + man_light_skin_tone_red_hair;
const segments = segmenter.segment(input);
for (let i = 0; i < input.length; i++) {
let idx = i < 4 ? i : 4;
let expectation = i < 4 ? input[i] : man_light_skin_tone_red_hair;
assertEquals({segment: expectation, index: idx, input},
segments.containing(i));
let result = [];
for (let v of segments) {
result.push(v.segment);
result.push(":");
// Ensure the value n passing into segments.containing(n) will not impact
// the result of next().
assertEquals({segment: expectation, index: idx, input},
segments.containing(i));
}
assertEquals("A:B:C:D:" + man_light_skin_tone_red_hair + ":",
result.join(""));
}
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const segmenter = new Intl.Segmenter(undefined, {granularity: 'word'});
const waving_hand_light_skin_tone = "\uD83D\uDC4B\uD83C\uDFFB";
const man_light_skin_tone_red_hair =
"\uD83D\uDC68\uD83C\uDFFB\u200D\uD83E\uDDB0";
// 1 2
// 034566 89012 3 4 56 89 01
const input = "ซิ่ง Ame\u0301lie台北\uD800\uDCB0 " + waving_hand_light_skin_tone +
// 2
// 2345678
man_light_skin_tone_red_hair;
const segments = segmenter.segment(input);
// Test less than 0
assertEquals(undefined, segments.containing(-1));
// Test longer than input
assertEquals(undefined, segments.containing(input.length));
assertEquals(undefined, segments.containing(input.length + 1));
// Test modifier in Thai
const thai = "ซิ่ง";
let pos = 0;
for (let i = pos; i < pos + thai.length; i++) {
assertEquals({segment: "ซิ่ง", index: pos, input, isWordLike: true},
segments.containing(i));
}
pos += thai.length;
// Test SPACE
assertEquals({segment: " ", index: pos, input, isWordLike: false},
segments.containing(pos));
pos++;
// Test Latin with modifier
const latin_with_modifier = "Ame\u0301lie";
for (let i = pos; i < pos + latin_with_modifier.length; i++) {
assertEquals(
{segment: latin_with_modifier, index: pos, input, isWordLike: true},
segments.containing(i));
}
pos += latin_with_modifier.length;
// Test Han
const taipei = "台北";
for (let i = pos; i < pos + taipei.length; i++) {
assertEquals({segment: taipei, index: pos, input, isWordLike: true},
segments.containing(i));
}
pos += taipei.length;
// Test Surrogate pair
const surrogate = "\uD800\uDCB0";
for (let i = pos; i < pos + surrogate.length; i++) {
assertEquals({segment: surrogate, index: pos, input, isWordLike: true},
segments.containing(14));
}
pos += surrogate.length;
// Test SPACE
assertEquals({segment: " ", index: pos, input, isWordLike: false},
segments.containing(pos));
pos++;
// Test Emoji modifier: U+1F44B U+1F3FB
for (let i = pos; i < pos + waving_hand_light_skin_tone.length; i++) {
assertEquals({segment: waving_hand_light_skin_tone, index: pos, input,
isWordLike: false},
segments.containing(i));
}
pos += waving_hand_light_skin_tone.length;
// Test Emoji modifiers sequence: U+1F468 U+1F3FB U+200D U+1F9B0
for (let i = pos; i < pos + man_light_skin_tone_red_hair.length; i++) {
assertEquals({segment: man_light_skin_tone_red_hair, index: pos, input,
isWordLike: false},
segments.containing(i));
}
pos += man_light_skin_tone_red_hair.length;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment