Commit 45fe356e authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

[Intl] Remove linebreak from Segmenter

Sync w/ https://github.com/tc39/proposal-intl-segmenter/pull/60

Bug: v8:8717
Change-Id: I98fe9e88367a611c14c82195222c8fe8a52e4bc8
Reviewed-on: https://chromium-review.googlesource.com/c/1422749Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarSathya Gunasekaran <gsathya@chromium.org>
Commit-Queue: Frank Tang <ftang@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59016}
parent 437e134a
......@@ -23,7 +23,6 @@
V(_, h12_string, "h12") \
V(_, h23_string, "h23") \
V(_, h24_string, "h24") \
V(_, hard_string, "hard") \
V(_, hour_string, "hour") \
V(_, hour12_string, "hour12") \
V(_, hourCycle_string, "hourCycle") \
......@@ -36,7 +35,6 @@
V(_, integer_string, "integer") \
V(_, kana_string, "kana") \
V(_, letter_string, "letter") \
V(_, lineBreakStyle_string, "lineBreakStyle") \
V(_, list_string, "list") \
V(_, literal_string, "literal") \
V(_, locale_string, "locale") \
......@@ -64,7 +62,6 @@
V(_, SegmentIterator_string, "Segment Iterator") \
V(_, sensitivity_string, "sensitivity") \
V(_, sep_string, "sep") \
V(_, soft_string, "soft") \
V(_, strict_string, "strict") \
V(_, style_string, "style") \
V(_, term_string, "term") \
......
......@@ -2103,7 +2103,6 @@ void JSSegmenter::JSSegmenterPrint(std::ostream& os) { // NOLINT
JSObjectPrintHeader(os, *this, "JSSegmenter");
os << "\n - locale: " << Brief(locale());
os << "\n - granularity: " << GranularityAsString();
os << "\n - lineBreakStyle: " << LineBreakStyleAsString();
os << "\n - icu break iterator: " << Brief(icu_break_iterator());
JSObjectPrintBody(os, *this);
}
......
......@@ -37,8 +37,6 @@ Handle<String> JSSegmentIterator::GranularityAsString() const {
return GetReadOnlyRoots().word_string_handle();
case JSSegmenter::Granularity::SENTENCE:
return GetReadOnlyRoots().sentence_string_handle();
case JSSegmenter::Granularity::LINE:
return GetReadOnlyRoots().line_string_handle();
case JSSegmenter::Granularity::COUNT:
UNREACHABLE();
}
......@@ -106,18 +104,6 @@ Handle<Object> JSSegmentIterator::BreakType() const {
return GetReadOnlyRoots().word_string_handle();
}
return GetReadOnlyRoots().undefined_value_handle();
case JSSegmenter::Granularity::LINE:
if (rule_status >= UBRK_LINE_SOFT && rule_status < UBRK_LINE_SOFT_LIMIT) {
// soft line breaks, index at which a line break is acceptable but
// not required
return GetReadOnlyRoots().soft_string_handle();
}
if ((rule_status >= UBRK_LINE_HARD &&
rule_status < UBRK_LINE_HARD_LIMIT)) {
// hard, or mandatory line breaks
return GetReadOnlyRoots().hard_string_handle();
}
return GetReadOnlyRoots().undefined_value_handle();
case JSSegmenter::Granularity::SENTENCE:
if (rule_status >= UBRK_SENTENCE_TERM &&
rule_status < UBRK_SENTENCE_TERM_LIMIT) {
......
......@@ -77,7 +77,7 @@ class JSSegmentIterator : public JSObject {
// Bit positions in |flags|.
#define FLAGS_BIT_FIELDS(V, _) \
V(GranularityBits, JSSegmenter::Granularity, 3, _) \
V(GranularityBits, JSSegmenter::Granularity, 2, _) \
V(BreakTypeSetBits, bool, 1, _)
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
#undef FLAGS_BIT_FIELDS
......@@ -85,7 +85,6 @@ class JSSegmentIterator : public JSObject {
STATIC_ASSERT(JSSegmenter::Granularity::GRAPHEME <= GranularityBits::kMax);
STATIC_ASSERT(JSSegmenter::Granularity::WORD <= GranularityBits::kMax);
STATIC_ASSERT(JSSegmenter::Granularity::SENTENCE <= GranularityBits::kMax);
STATIC_ASSERT(JSSegmenter::Granularity::LINE <= GranularityBits::kMax);
// [flags] Bit field containing various flags about the function.
DECL_INT_ACCESSORS(flags)
......
......@@ -26,17 +26,6 @@ ACCESSORS(JSSegmenter, icu_break_iterator, Managed<icu::BreakIterator>,
kICUBreakIteratorOffset)
SMI_ACCESSORS(JSSegmenter, flags, kFlagsOffset)
inline void JSSegmenter::set_line_break_style(LineBreakStyle line_break_style) {
DCHECK_GT(LineBreakStyle::COUNT, line_break_style);
int hints = flags();
hints = LineBreakStyleBits::update(hints, line_break_style);
set_flags(hints);
}
inline JSSegmenter::LineBreakStyle JSSegmenter::line_break_style() const {
return LineBreakStyleBits::decode(flags());
}
inline void JSSegmenter::set_granularity(Granularity granularity) {
DCHECK_GT(Granularity::COUNT, granularity);
int hints = flags();
......
......@@ -23,18 +23,10 @@
namespace v8 {
namespace internal {
JSSegmenter::LineBreakStyle JSSegmenter::GetLineBreakStyle(const char* str) {
if (strcmp(str, "strict") == 0) return JSSegmenter::LineBreakStyle::STRICT;
if (strcmp(str, "normal") == 0) return JSSegmenter::LineBreakStyle::NORMAL;
if (strcmp(str, "loose") == 0) return JSSegmenter::LineBreakStyle::LOOSE;
UNREACHABLE();
}
JSSegmenter::Granularity JSSegmenter::GetGranularity(const char* str) {
if (strcmp(str, "grapheme") == 0) return JSSegmenter::Granularity::GRAPHEME;
if (strcmp(str, "word") == 0) return JSSegmenter::Granularity::WORD;
if (strcmp(str, "sentence") == 0) return JSSegmenter::Granularity::SENTENCE;
if (strcmp(str, "line") == 0) return JSSegmenter::Granularity::LINE;
UNREACHABLE();
}
......@@ -72,25 +64,11 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSSegmenter>());
Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
// 8. Set opt.[[lb]] to lineBreakStyle.
// 9. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
// requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
Intl::ResolvedLocale r =
Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
requested_locales, matcher, {"lb"});
// 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
// "strict", "normal", "loose" », "normal").
Maybe<LineBreakStyle> maybe_line_break_style =
Intl::GetStringOption<LineBreakStyle>(
isolate, options, "lineBreakStyle", "Intl.Segmenter",
{"strict", "normal", "loose"},
{LineBreakStyle::STRICT, LineBreakStyle::NORMAL,
LineBreakStyle::LOOSE},
LineBreakStyle::NORMAL);
MAYBE_RETURN(maybe_line_break_style, MaybeHandle<JSSegmenter>());
LineBreakStyle line_break_style_enum = maybe_line_break_style.FromJust();
requested_locales, matcher, {});
// 10. Set segmenter.[[Locale]] to the value of r.[[Locale]].
Handle<String> locale_str =
......@@ -98,12 +76,11 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
segmenter_holder->set_locale(*locale_str);
// 13. Let granularity be ? GetOption(options, "granularity", "string", «
// "grapheme", "word", "sentence", "line" », "grapheme").
// "grapheme", "word", "sentence" », "grapheme").
Maybe<Granularity> maybe_granularity = Intl::GetStringOption<Granularity>(
isolate, options, "granularity", "Intl.Segmenter",
{"grapheme", "word", "sentence", "line"},
{Granularity::GRAPHEME, Granularity::WORD, Granularity::SENTENCE,
Granularity::LINE},
{"grapheme", "word", "sentence"},
{Granularity::GRAPHEME, Granularity::WORD, Granularity::SENTENCE},
Granularity::GRAPHEME);
MAYBE_RETURN(maybe_granularity, MaybeHandle<JSSegmenter>());
Granularity granularity_enum = maybe_granularity.FromJust();
......@@ -111,14 +88,6 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
// 14. Set segmenter.[[SegmenterGranularity]] to granularity.
segmenter_holder->set_granularity(granularity_enum);
// 15. If granularity is "line",
if (granularity_enum == Granularity::LINE) {
// a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
segmenter_holder->set_line_break_style(line_break_style_enum);
} else {
segmenter_holder->set_line_break_style(LineBreakStyle::NOTSET);
}
icu::Locale icu_locale = r.icu_locale;
DCHECK(!icu_locale.isBogus());
......@@ -138,21 +107,6 @@ MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
icu_break_iterator.reset(
icu::BreakIterator::createSentenceInstance(icu_locale, status));
break;
case Granularity::LINE: {
// 15. If granularity is "line",
// a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
const char* key = uloc_toLegacyKey("lb");
CHECK_NOT_NULL(key);
const char* value =
uloc_toLegacyType(key, segmenter_holder->LineBreakStyleAsCString());
CHECK_NOT_NULL(value);
UErrorCode status = U_ZERO_ERROR;
icu_locale.setKeywordValue(key, value, status);
CHECK(U_SUCCESS(status));
icu_break_iterator.reset(
icu::BreakIterator::createLineInstance(icu_locale, status));
break;
}
case Granularity::COUNT:
UNREACHABLE();
}
......@@ -185,49 +139,16 @@ Handle<JSObject> JSSegmenter::ResolvedOptions(
// Internal Slot Property
// [[Locale]] "locale"
// [[SegmenterGranularity]] "granularity"
// [[SegmenterLineBreakStyle]] "lineBreakStyle"
Handle<String> locale(segmenter_holder->locale(), isolate);
JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
NONE);
JSObject::AddProperty(isolate, result, factory->granularity_string(),
segmenter_holder->GranularityAsString(), NONE);
if (segmenter_holder->line_break_style() != LineBreakStyle::NOTSET) {
JSObject::AddProperty(isolate, result, factory->lineBreakStyle_string(),
segmenter_holder->LineBreakStyleAsString(), NONE);
}
// 5. Return options.
return result;
}
const char* JSSegmenter::LineBreakStyleAsCString() const {
switch (line_break_style()) {
case LineBreakStyle::STRICT:
return "strict";
case LineBreakStyle::NORMAL:
return "normal";
case LineBreakStyle::LOOSE:
return "loose";
case LineBreakStyle::COUNT:
case LineBreakStyle::NOTSET:
UNREACHABLE();
}
}
Handle<String> JSSegmenter::LineBreakStyleAsString() const {
switch (line_break_style()) {
case LineBreakStyle::STRICT:
return GetReadOnlyRoots().strict_string_handle();
case LineBreakStyle::NORMAL:
return GetReadOnlyRoots().normal_string_handle();
case LineBreakStyle::LOOSE:
return GetReadOnlyRoots().loose_string_handle();
case LineBreakStyle::COUNT:
case LineBreakStyle::NOTSET:
UNREACHABLE();
}
}
Handle<String> JSSegmenter::GranularityAsString() const {
switch (granularity()) {
case Granularity::GRAPHEME:
......@@ -236,8 +157,6 @@ Handle<String> JSSegmenter::GranularityAsString() const {
return GetReadOnlyRoots().word_string_handle();
case Granularity::SENTENCE:
return GetReadOnlyRoots().sentence_string_handle();
case Granularity::LINE:
return GetReadOnlyRoots().line_string_handle();
case Granularity::COUNT:
UNREACHABLE();
}
......
......@@ -41,8 +41,6 @@ class JSSegmenter : public JSObject {
static std::set<std::string> GetAvailableLocales();
Handle<String> LineBreakStyleAsString() const;
const char* LineBreakStyleAsCString() const;
Handle<String> GranularityAsString() const;
DECL_CAST(JSSegmenter)
......@@ -52,21 +50,6 @@ class JSSegmenter : public JSObject {
DECL_ACCESSORS(icu_break_iterator, Managed<icu::BreakIterator>)
// LineBreakStyle: identifying the style used for line break.
//
// ecma402 #sec-segmenter-internal-slots
enum class LineBreakStyle {
NOTSET, // While the granularity is not LINE
STRICT, // CSS level 3 line-break=strict, e.g. treat CJ as NS
NORMAL, // CSS level 3 line-break=normal, e.g. treat CJ as ID, break before
// hyphens for ja,zh
LOOSE, // CSS level 3 line-break=loose
COUNT
};
inline void set_line_break_style(LineBreakStyle line_break_style);
inline LineBreakStyle line_break_style() const;
// Granularity: identifying the segmenter used.
//
// ecma402 #sec-segmenter-internal-slots
......@@ -74,27 +57,19 @@ class JSSegmenter : public JSObject {
GRAPHEME, // for character-breaks
WORD, // for word-breaks
SENTENCE, // for sentence-breaks
LINE, // for line-breaks
COUNT
};
inline void set_granularity(Granularity granularity);
inline Granularity granularity() const;
// Bit positions in |flags|.
#define FLAGS_BIT_FIELDS(V, _) \
V(LineBreakStyleBits, LineBreakStyle, 3, _) \
V(GranularityBits, Granularity, 3, _)
#define FLAGS_BIT_FIELDS(V, _) V(GranularityBits, Granularity, 2, _)
DEFINE_BIT_FIELDS(FLAGS_BIT_FIELDS)
#undef FLAGS_BIT_FIELDS
STATIC_ASSERT(LineBreakStyle::NOTSET <= LineBreakStyleBits::kMax);
STATIC_ASSERT(LineBreakStyle::STRICT <= LineBreakStyleBits::kMax);
STATIC_ASSERT(LineBreakStyle::NORMAL <= LineBreakStyleBits::kMax);
STATIC_ASSERT(LineBreakStyle::LOOSE <= LineBreakStyleBits::kMax);
STATIC_ASSERT(Granularity::GRAPHEME <= GranularityBits::kMax);
STATIC_ASSERT(Granularity::WORD <= GranularityBits::kMax);
STATIC_ASSERT(Granularity::SENTENCE <= GranularityBits::kMax);
STATIC_ASSERT(Granularity::LINE <= GranularityBits::kMax);
// [flags] Bit field containing various flags about the function.
DECL_INT_ACCESSORS(flags)
......@@ -115,7 +90,6 @@ class JSSegmenter : public JSObject {
#undef JS_SEGMENTER_FIELDS
private:
static LineBreakStyle GetLineBreakStyle(const char* str);
static Granularity GetGranularity(const char* str);
OBJECT_CONSTRUCTORS(JSSegmenter, JSObject);
......
......@@ -11,9 +11,6 @@ let invalid_lb = [
"keepall",
"none",
"standard",
];
let valid_lb= [
"strict",
"normal",
"loose",
......@@ -30,12 +27,3 @@ invalid_lb.forEach(function(lb) {
assertEquals("en", df.resolvedOptions().locale);
}
);
valid_lb.forEach(function(lb) {
locales.forEach(function(base) {
let l = base + "-u-lb-" + lb;
let df = new Intl.Segmenter([l + "-fo-obar"]);
assertEquals(l, df.resolvedOptions().locale);
});
}
);
......@@ -13,11 +13,8 @@ new Intl.Segmenter(['en-US'], {
get localeMatcher() {
assertEquals(0, getCount++);
},
get lineBreakStyle() {
assertEquals(1, getCount++);
},
get granularity() {
assertEquals(2, getCount++);
assertEquals(1, getCount++);
},
});
assertEquals(3, getCount);
assertEquals(2, getCount);
......@@ -42,7 +42,7 @@ assertDoesNotThrow(
() => new Intl.Segmenter(["sr"], { granularity: "grapheme" })
);
assertDoesNotThrow(() => new Intl.Segmenter(["sr"], { granularity: "line" }));
assertThrows(() => new Intl.Segmenter(["sr"], { granularity: "line" }), RangeError);
assertThrows(
() => new Intl.Segmenter(["sr"], { granularity: "standard" }),
......@@ -61,9 +61,8 @@ assertDoesNotThrow(
() => new Intl.Segmenter(["sr"], { lineBreakStyle: "loose" })
);
assertThrows(
() => new Intl.Segmenter(["sr"], { lineBreakStyle: "giant" }),
RangeError
assertDoesNotThrow(
() => new Intl.Segmenter(["sr"], { lineBreakStyle: "giant" })
);
assertDoesNotThrow(
......@@ -138,28 +137,28 @@ assertDoesNotThrow(
})
);
assertDoesNotThrow(
assertThrows(
() =>
new Intl.Segmenter(["sr"], {
granularity: "line",
lineBreakStyle: "loose"
})
}), RangeError
);
assertDoesNotThrow(
assertThrows(
() =>
new Intl.Segmenter(["sr"], {
granularity: "line",
lineBreakStyle: "normal"
})
}), RangeError
);
assertDoesNotThrow(
assertThrows(
() =>
new Intl.Segmenter(["sr"], {
granularity: "line",
lineBreakStyle: "strict"
})
}), RangeError
);
// propagate exception from getter
......@@ -172,14 +171,13 @@ assertThrows(
}),
TypeError
);
assertThrows(
assertDoesNotThrow(
() =>
new Intl.Segmenter(undefined, {
get lineBreakStyle() {
throw new TypeError("");
}
}),
TypeError
})
);
assertThrows(
() =>
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
let segmenter = new Intl.Segmenter([], { granularity: "line" });
// The default lineBreakStyle is 'normal'
assertEquals("normal", segmenter.resolvedOptions().lineBreakStyle);
segmenter = new Intl.Segmenter();
assertEquals(undefined, segmenter.resolvedOptions().lineBreakStyle);
// The default granularity is 'grapheme'
assertEquals("grapheme", segmenter.resolvedOptions().granularity);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], { lineBreakStyle: "strict" }).resolvedOptions()
.lineBreakStyle
);
assertEquals(
"grapheme",
new Intl.Segmenter(["sr"], { lineBreakStyle: "strict" }).resolvedOptions()
.granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], { lineBreakStyle: "normal" }).resolvedOptions()
.lineBreakStyle
);
assertEquals(
"grapheme",
new Intl.Segmenter(["sr"], { lineBreakStyle: "normal" }).resolvedOptions()
.granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], { lineBreakStyle: "loose" }).resolvedOptions()
.lineBreakStyle
);
assertEquals(
"grapheme",
new Intl.Segmenter(["sr"], { lineBreakStyle: "loose" }).resolvedOptions()
.granularity
);
assertEquals(
"word",
new Intl.Segmenter(["sr"], { granularity: "word" }).resolvedOptions()
.granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], { granularity: "word" }).resolvedOptions()
.lineBreakStyle
);
assertEquals(
"grapheme",
new Intl.Segmenter(["sr"], { granularity: "grapheme" }).resolvedOptions()
.granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], { granularity: "grapheme" }).resolvedOptions()
.lineBreakStyle
);
assertEquals(
"sentence",
new Intl.Segmenter(["sr"], { granularity: "sentence" }).resolvedOptions()
.granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], { granularity: "sentence" }).resolvedOptions()
.lineBreakStyle
);
assertEquals(
"line",
new Intl.Segmenter(["sr"], { granularity: "line" }).resolvedOptions()
.granularity
);
assertEquals(
"normal",
new Intl.Segmenter(["sr"], { granularity: "line" }).resolvedOptions()
.lineBreakStyle
);
assertEquals(
"grapheme",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "grapheme"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "grapheme"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"grapheme",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "grapheme"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "grapheme"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"grapheme",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "grapheme"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "grapheme"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"word",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "word"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "word"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"word",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "word"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "word"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"word",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "word"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "word"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"sentence",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "sentence"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "sentence"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"sentence",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "sentence"
}).resolvedOptions().granularity
);
assertEquals(
undefined,
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "sentence"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"sentence",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "sentence"
}).resolvedOptions().granularity
);
assertEquals(
"normal",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "line"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"line",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "line"
}).resolvedOptions().granularity
);
assertEquals(
"loose",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "loose",
granularity: "line"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"line",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "line"
}).resolvedOptions().granularity
);
assertEquals(
"strict",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "strict",
granularity: "line"
}).resolvedOptions().lineBreakStyle
);
assertEquals(
"line",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "line"
}).resolvedOptions().granularity
);
assertEquals(
"normal",
new Intl.Segmenter(["sr"], {
lineBreakStyle: "normal",
granularity: "line"
}).resolvedOptions().lineBreakStyle
);
assertEquals("ar", new Intl.Segmenter(["ar"]).resolvedOptions().locale);
assertEquals("ar", new Intl.Segmenter(["ar", "en"]).resolvedOptions().locale);
assertEquals("fr", new Intl.Segmenter(["fr", "en"]).resolvedOptions().locale);
assertEquals("ar", new Intl.Segmenter(["xyz", "ar"]).resolvedOptions().locale);
......@@ -5,7 +5,7 @@
// Flags: --harmony-intl-segmenter
const text = "Hello World, Test 123! Foo Bar. How are you?";
for (const granularity of ["grapheme", "word", "sentence", "line"]) {
for (const granularity of ["grapheme", "word", "sentence"]) {
const segmenter = new Intl.Segmenter("en", { granularity });
const iter = segmenter.segment(text);
......
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
let breakCounts = {};
for (const locale of ["en", "fr", "ja", "zh", "ko"]) {
for (const lineBreakStyle of ["strict", "normal", "loose"]) {
const seg = new Intl.Segmenter(
[locale], {granularity: "line", lineBreakStyle: lineBreakStyle});
let opportunity = 0;
for (const text of [
// We know the following data caused different line break results between
// different modes.
// https://www.w3.org/TR/css-text-3/#propdef-line-break
// Japanese small kana or the Katakana-Hiragana prolonged sound mark
"あぁーぃーあーいーぁーぃー",
// hyphens:
// ‐ U+2010, – U+2013, 〜 U+301C, ゠ U+30A0
"ABC‐DEF–GHI〜JKL゠MNO",
// iteration marks:
// 々 U+3005, 〻 U+303B, ゝ U+309D, ゞ U+309E, ヽ U+30FD, ヾ U+30FE
"あ々あ〻あゝあゞあヽあヾあ",
// centered punctuation marks:
// ・ U+30FB, : U+FF1A, ; U+FF1B, ・ U+FF65, ‼ U+203C
"ABC・DEF:GHI;JKL・MNO‼PQR",
// centered punctuation marks:
// ⁇ U+2047, ⁈ U+2048, ⁉ U+2049, ! U+FF01, ? U+FF1F
"ABC⁇DEF⁈GHI⁉JKL!MNO?PQR",
]) {
const iter = seg.segment(text);
while (!iter.following()) {
opportunity++;
}
}
breakCounts[locale + "-" + lineBreakStyle] = opportunity;
}
}
// In Japanese
// Just test the break count in loose mode is greater than normal mode.
assertTrue(breakCounts["ja-loose"] > breakCounts["ja-normal"]);
// and test the break count in normal mode is greater than strict mode.
assertTrue(breakCounts["ja-normal"] > breakCounts["ja-strict"]);
// In Chinese
// Just test the break count in loose mode is greater than normal mode.
assertTrue(breakCounts["zh-loose"] > breakCounts["zh-normal"]);
// and test the break count in normal mode is greater than strict mode.
assertTrue(breakCounts["zh-normal"] > breakCounts["zh-strict"]);
// In English, French and Korean
assertTrue(breakCounts["en-loose"] >= breakCounts["en-normal"]);
assertTrue(breakCounts["fr-loose"] >= breakCounts["fr-normal"]);
assertTrue(breakCounts["ko-loose"] >= breakCounts["ko-normal"]);
// and test the break count in normal mode is greater than strict mode.
assertTrue(breakCounts["en-normal"] > breakCounts["en-strict"]);
assertTrue(breakCounts["fr-normal"] > breakCounts["fr-strict"]);
assertTrue(breakCounts["ko-normal"] > breakCounts["ko-strict"]);
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "line"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = 0;
let segments = [];
while (!iter.following()) {
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index > prev);
segments.push(text.substring(prev, iter.index));
prev = iter.index;
}
assertEquals(text, segments.join(""));
}
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "line"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
let segments = [];
// Create another %SegmentIterator% to compare with result from the one that
// created in the for of loop.
let iter = seg.segment(text);
let prev = 0;
for (const v of seg.segment(text)) {
assertTrue(["soft", "hard"].includes(v.breakType), v.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
// manually advance the iter.
assertFalse(iter.following());
assertEquals(iter.breakType, v.breakType);
assertEquals(text.substring(prev, iter.index), v.segment);
prev = iter.index;
}
assertTrue(iter.following());
assertEquals(text, segments.join(''));
}
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "line"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let segments = [];
let oldPos = -1;
for (let result = iter.next(); !result.done; result = iter.next()) {
const v = result.value;
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
assertEquals("string", typeof v.segment);
assertTrue(v.segment.length > 0);
segments.push(v.segment);
assertEquals("number", typeof v.index);
assertTrue(oldPos < v.index);
oldPos = v.index;
}
assertEquals(text, segments.join(''));
}
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "line"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = text.length;
let segments = [];
iter.preceding(prev)
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
while (!iter.preceding()) {
assertTrue(["soft", "hard"].includes(iter.breakType), iter.breakType);
assertTrue(iter.index >= 0);
assertTrue(iter.index <= text.length);
assertTrue(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
}
assertEquals(text, segments.reverse().join(""));
}
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-intl-segmenter
const seg = new Intl.Segmenter([], {granularity: "line"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
assertEquals(undefined, iter.breakType);
assertEquals(0, iter.index);
}
......@@ -506,6 +506,24 @@
'language/expressions/call/eval-spread-empty-leading': [FAIL],
'language/expressions/call/eval-spread-empty-trailing': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=8717
'intl402/Segmenter/constructor/constructor/options-granularity-valid': [FAIL],
'intl402/Segmenter/constructor/constructor/options-lineBreakStyle-invalid': [FAIL],
'intl402/Segmenter/constructor/constructor/options-lineBreakStyle-valid': [FAIL],
'intl402/Segmenter/constructor/constructor/options-order': [FAIL],
'intl402/Segmenter/constructor/constructor/options-throwing-getters': [FAIL],
'intl402/Segmenter/constructor/constructor/options-toobject-prototype': [FAIL],
'intl402/Segmenter/constructor/constructor/options-valid-combinations': [FAIL],
'intl402/Segmenter/iterator/granularity': [FAIL],
'intl402/Segmenter/prototype/resolvedOptions/order': [FAIL],
'intl402/Segmenter/prototype/resolvedOptions/type-with-lbs': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-following': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-following-modes': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-iterable': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-next': [FAIL],
'intl402/Segmenter/prototype/segment/segment-line-preceding': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=7472
'intl402/NumberFormat/currency-digits': [FAIL],
......
......@@ -307,44 +307,44 @@ KNOWN_MAPS = {
("RO_SPACE", 0x027d1): (173, "Tuple2Map"),
("RO_SPACE", 0x02871): (175, "ArrayBoilerplateDescriptionMap"),
("RO_SPACE", 0x02bb1): (163, "InterceptorInfoMap"),
("RO_SPACE", 0x050d1): (153, "AccessCheckInfoMap"),
("RO_SPACE", 0x05121): (154, "AccessorInfoMap"),
("RO_SPACE", 0x05171): (155, "AccessorPairMap"),
("RO_SPACE", 0x051c1): (156, "AliasedArgumentsEntryMap"),
("RO_SPACE", 0x05211): (157, "AllocationMementoMap"),
("RO_SPACE", 0x05261): (158, "AsmWasmDataMap"),
("RO_SPACE", 0x052b1): (159, "AsyncGeneratorRequestMap"),
("RO_SPACE", 0x05301): (160, "DebugInfoMap"),
("RO_SPACE", 0x05351): (161, "FunctionTemplateInfoMap"),
("RO_SPACE", 0x053a1): (162, "FunctionTemplateRareDataMap"),
("RO_SPACE", 0x053f1): (164, "InterpreterDataMap"),
("RO_SPACE", 0x05441): (165, "ModuleInfoEntryMap"),
("RO_SPACE", 0x05491): (166, "ModuleMap"),
("RO_SPACE", 0x054e1): (167, "ObjectTemplateInfoMap"),
("RO_SPACE", 0x05531): (168, "PromiseCapabilityMap"),
("RO_SPACE", 0x05581): (169, "PromiseReactionMap"),
("RO_SPACE", 0x055d1): (170, "PrototypeInfoMap"),
("RO_SPACE", 0x05621): (171, "ScriptMap"),
("RO_SPACE", 0x05671): (172, "StackFrameInfoMap"),
("RO_SPACE", 0x056c1): (174, "Tuple3Map"),
("RO_SPACE", 0x05711): (176, "WasmDebugInfoMap"),
("RO_SPACE", 0x05761): (177, "WasmExceptionTagMap"),
("RO_SPACE", 0x057b1): (178, "WasmExportedFunctionDataMap"),
("RO_SPACE", 0x05801): (179, "CallableTaskMap"),
("RO_SPACE", 0x05851): (180, "CallbackTaskMap"),
("RO_SPACE", 0x058a1): (181, "PromiseFulfillReactionJobTaskMap"),
("RO_SPACE", 0x058f1): (182, "PromiseRejectReactionJobTaskMap"),
("RO_SPACE", 0x05941): (183, "PromiseResolveThenableJobTaskMap"),
("RO_SPACE", 0x05991): (184, "WeakFactoryCleanupJobTaskMap"),
("RO_SPACE", 0x059e1): (185, "AllocationSiteWithWeakNextMap"),
("RO_SPACE", 0x05a31): (185, "AllocationSiteWithoutWeakNextMap"),
("RO_SPACE", 0x05a81): (219, "LoadHandler1Map"),
("RO_SPACE", 0x05ad1): (219, "LoadHandler2Map"),
("RO_SPACE", 0x05b21): (219, "LoadHandler3Map"),
("RO_SPACE", 0x05b71): (227, "StoreHandler0Map"),
("RO_SPACE", 0x05bc1): (227, "StoreHandler1Map"),
("RO_SPACE", 0x05c11): (227, "StoreHandler2Map"),
("RO_SPACE", 0x05c61): (227, "StoreHandler3Map"),
("RO_SPACE", 0x05081): (153, "AccessCheckInfoMap"),
("RO_SPACE", 0x050d1): (154, "AccessorInfoMap"),
("RO_SPACE", 0x05121): (155, "AccessorPairMap"),
("RO_SPACE", 0x05171): (156, "AliasedArgumentsEntryMap"),
("RO_SPACE", 0x051c1): (157, "AllocationMementoMap"),
("RO_SPACE", 0x05211): (158, "AsmWasmDataMap"),
("RO_SPACE", 0x05261): (159, "AsyncGeneratorRequestMap"),
("RO_SPACE", 0x052b1): (160, "DebugInfoMap"),
("RO_SPACE", 0x05301): (161, "FunctionTemplateInfoMap"),
("RO_SPACE", 0x05351): (162, "FunctionTemplateRareDataMap"),
("RO_SPACE", 0x053a1): (164, "InterpreterDataMap"),
("RO_SPACE", 0x053f1): (165, "ModuleInfoEntryMap"),
("RO_SPACE", 0x05441): (166, "ModuleMap"),
("RO_SPACE", 0x05491): (167, "ObjectTemplateInfoMap"),
("RO_SPACE", 0x054e1): (168, "PromiseCapabilityMap"),
("RO_SPACE", 0x05531): (169, "PromiseReactionMap"),
("RO_SPACE", 0x05581): (170, "PrototypeInfoMap"),
("RO_SPACE", 0x055d1): (171, "ScriptMap"),
("RO_SPACE", 0x05621): (172, "StackFrameInfoMap"),
("RO_SPACE", 0x05671): (174, "Tuple3Map"),
("RO_SPACE", 0x056c1): (176, "WasmDebugInfoMap"),
("RO_SPACE", 0x05711): (177, "WasmExceptionTagMap"),
("RO_SPACE", 0x05761): (178, "WasmExportedFunctionDataMap"),
("RO_SPACE", 0x057b1): (179, "CallableTaskMap"),
("RO_SPACE", 0x05801): (180, "CallbackTaskMap"),
("RO_SPACE", 0x05851): (181, "PromiseFulfillReactionJobTaskMap"),
("RO_SPACE", 0x058a1): (182, "PromiseRejectReactionJobTaskMap"),
("RO_SPACE", 0x058f1): (183, "PromiseResolveThenableJobTaskMap"),
("RO_SPACE", 0x05941): (184, "WeakFactoryCleanupJobTaskMap"),
("RO_SPACE", 0x05991): (185, "AllocationSiteWithWeakNextMap"),
("RO_SPACE", 0x059e1): (185, "AllocationSiteWithoutWeakNextMap"),
("RO_SPACE", 0x05a31): (219, "LoadHandler1Map"),
("RO_SPACE", 0x05a81): (219, "LoadHandler2Map"),
("RO_SPACE", 0x05ad1): (219, "LoadHandler3Map"),
("RO_SPACE", 0x05b21): (227, "StoreHandler0Map"),
("RO_SPACE", 0x05b71): (227, "StoreHandler1Map"),
("RO_SPACE", 0x05bc1): (227, "StoreHandler2Map"),
("RO_SPACE", 0x05c11): (227, "StoreHandler3Map"),
("MAP_SPACE", 0x00139): (1057, "ExternalMap"),
("MAP_SPACE", 0x00189): (1073, "JSMessageObjectMap"),
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment