Commit 49569e5a authored by Frank Tang's avatar Frank Tang Committed by Commit Bot

[Intl] Simplified ListFormat implementation

Use ICU64 new API formatStringsToValue

Bug: v8:8836
Change-Id: I7399a301b2536f331b1df1e1845adf2e533bafb9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1560659
Commit-Queue: Frank Tang <ftang@chromium.org>
Reviewed-by: 's avatarSathya Gunasekaran <gsathya@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60856}
parent c3d5b5f0
...@@ -254,78 +254,6 @@ Handle<String> JSListFormat::TypeAsString() const { ...@@ -254,78 +254,6 @@ Handle<String> JSListFormat::TypeAsString() const {
namespace { namespace {
MaybeHandle<JSArray> GenerateListFormatParts(
Isolate* isolate, const icu::UnicodeString& formatted,
const std::vector<icu::FieldPosition>& positions) {
Factory* factory = isolate->factory();
Handle<JSArray> array =
factory->NewJSArray(static_cast<int>(positions.size()));
int index = 0;
int prev_item_end_index = 0;
Handle<String> substring;
for (const icu::FieldPosition pos : positions) {
CHECK(pos.getBeginIndex() >= prev_item_end_index);
CHECK(pos.getField() == ULISTFMT_ELEMENT_FIELD);
if (pos.getBeginIndex() != prev_item_end_index) {
ASSIGN_RETURN_ON_EXCEPTION(
isolate, substring,
Intl::ToString(isolate, formatted, prev_item_end_index,
pos.getBeginIndex()),
JSArray);
Intl::AddElement(isolate, array, index++, factory->literal_string(),
substring);
}
ASSIGN_RETURN_ON_EXCEPTION(
isolate, substring,
Intl::ToString(isolate, formatted, pos.getBeginIndex(),
pos.getEndIndex()),
JSArray);
Intl::AddElement(isolate, array, index++, factory->element_string(),
substring);
prev_item_end_index = pos.getEndIndex();
}
if (prev_item_end_index != formatted.length()) {
ASSIGN_RETURN_ON_EXCEPTION(
isolate, substring,
Intl::ToString(isolate, formatted, prev_item_end_index,
formatted.length()),
JSArray);
Intl::AddElement(isolate, array, index++, factory->literal_string(),
substring);
}
return array;
}
// Get all the FieldPosition into a vector from FieldPositionIterator and return
// them in output order.
std::vector<icu::FieldPosition> GenerateFieldPosition(
icu::FieldPositionIterator iter) {
std::vector<icu::FieldPosition> positions;
icu::FieldPosition pos;
while (iter.next(pos)) {
// Only take the information of the ULISTFMT_ELEMENT_FIELD field.
if (pos.getField() == ULISTFMT_ELEMENT_FIELD) {
positions.push_back(pos);
}
}
// Because the format may reoder the items, ICU FieldPositionIterator
// keep the order for FieldPosition based on the order of the input items.
// But the formatToParts API in ECMA402 expects in formatted output order.
// Therefore we have to sort based on beginIndex of the FieldPosition.
// Example of such is in the "ur" (Urdu) locale with type: "unit", where the
// main text flows from right to left, the formatted list of unit should flow
// from left to right and therefore in the memory the formatted result will
// put the first item on the last in the result string according the current
// CLDR patterns.
// See 'listPattern' pattern in
// third_party/icu/source/data/locales/ur_IN.txt
std::sort(positions.begin(), positions.end(),
[](icu::FieldPosition a, icu::FieldPosition b) {
return a.getBeginIndex() < b.getBeginIndex();
});
return positions;
}
// Extract String from JSArray into array of UnicodeString // Extract String from JSArray into array of UnicodeString
Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray( Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray(
Isolate* isolate, Handle<JSArray> array) { Isolate* isolate, Handle<JSArray> array) {
...@@ -365,64 +293,103 @@ Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray( ...@@ -365,64 +293,103 @@ Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray(
return Just(result); return Just(result);
} }
} // namespace template <typename T>
MaybeHandle<T> FormatListCommon(
// ecma402 #sec-formatlist Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list,
MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate, MaybeHandle<T> (*formatToResult)(Isolate*, const icu::FormattedList&)) {
Handle<JSListFormat> format,
Handle<JSArray> list) {
DCHECK(!list->IsUndefined()); DCHECK(!list->IsUndefined());
// ecma402 #sec-createpartsfromlist // ecma402 #sec-createpartsfromlist
// 2. If list contains any element value such that Type(value) is not String, // 2. If list contains any element value such that Type(value) is not String,
// throw a TypeError exception. // throw a TypeError exception.
Maybe<std::vector<icu::UnicodeString>> maybe_array = Maybe<std::vector<icu::UnicodeString>> maybe_array =
ToUnicodeStringArray(isolate, list); ToUnicodeStringArray(isolate, list);
MAYBE_RETURN(maybe_array, Handle<String>()); MAYBE_RETURN(maybe_array, Handle<T>());
std::vector<icu::UnicodeString> array = maybe_array.FromJust(); std::vector<icu::UnicodeString> array = maybe_array.FromJust();
icu::ListFormatter* formatter = format->icu_formatter()->raw(); icu::ListFormatter* formatter = format->icu_formatter()->raw();
CHECK_NOT_NULL(formatter); CHECK_NOT_NULL(formatter);
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString formatted; icu::FormattedList formatted = formatter->formatStringsToValue(
formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted, array.data(), static_cast<int32_t>(array.size()), status);
status); if (U_FAILURE(status)) {
DCHECK(U_SUCCESS(status)); THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), T);
}
return formatToResult(isolate, formatted);
}
return Intl::ToString(isolate, formatted); // A helper function to convert the FormattedList to a
// MaybeHandle<String> for the implementation of format.
MaybeHandle<String> FormattedToString(Isolate* isolate,
const icu::FormattedList& formatted) {
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString result = formatted.toString(status);
if (U_FAILURE(status)) {
THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
}
return Intl::ToString(isolate, result);
} }
const std::set<std::string>& JSListFormat::GetAvailableLocales() { Handle<String> IcuFieldIdToType(Isolate* isolate, int32_t field_id) {
// Since ListFormatter does not have a method to list all supported switch (field_id) {
// locales, use the one in icu::Locale per comments in case ULISTFMT_LITERAL_FIELD:
// ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015 return isolate->factory()->literal_string();
return Intl::GetAvailableLocalesForLocale(); case ULISTFMT_ELEMENT_FIELD:
return isolate->factory()->element_string();
default:
UNREACHABLE();
// To prevent MSVC from issuing C4715 warning.
return Handle<String>();
}
}
// A helper function to convert the FormattedList to a
// MaybeHandle<JSArray> for the implementation of formatToParts.
MaybeHandle<JSArray> FormattedToJSArray(Isolate* isolate,
const icu::FormattedList& formatted) {
Handle<JSArray> array = isolate->factory()->NewJSArray(0);
icu::ConstrainedFieldPosition cfpos;
cfpos.constrainCategory(UFIELD_CATEGORY_LIST);
int index = 0;
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString string = formatted.toString(status);
Handle<String> substring;
while (formatted.nextPosition(cfpos, status) && U_SUCCESS(status)) {
ASSIGN_RETURN_ON_EXCEPTION(
isolate, substring,
Intl::ToString(isolate, string, cfpos.getStart(), cfpos.getLimit()),
JSArray);
Intl::AddElement(isolate, array, index++,
IcuFieldIdToType(isolate, cfpos.getField()), substring);
}
if (U_FAILURE(status)) {
THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), JSArray);
}
JSObject::ValidateElements(*array);
return array;
}
} // namespace
// ecma402 #sec-formatlist
MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
Handle<JSListFormat> format,
Handle<JSArray> list) {
return FormatListCommon<String>(isolate, format, list, FormattedToString);
} }
// ecma42 #sec-formatlisttoparts // ecma42 #sec-formatlisttoparts
MaybeHandle<JSArray> JSListFormat::FormatListToParts( MaybeHandle<JSArray> JSListFormat::FormatListToParts(
Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list) { Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list) {
DCHECK(!list->IsUndefined()); return FormatListCommon<JSArray>(isolate, format, list, FormattedToJSArray);
// ecma402 #sec-createpartsfromlist }
// 2. If list contains any element value such that Type(value) is not String,
// throw a TypeError exception.
Maybe<std::vector<icu::UnicodeString>> maybe_array =
ToUnicodeStringArray(isolate, list);
MAYBE_RETURN(maybe_array, Handle<JSArray>());
std::vector<icu::UnicodeString> array = maybe_array.FromJust();
icu::ListFormatter* formatter = format->icu_formatter()->raw();
CHECK_NOT_NULL(formatter);
UErrorCode status = U_ZERO_ERROR; const std::set<std::string>& JSListFormat::GetAvailableLocales() {
icu::UnicodeString formatted; // Since ListFormatter does not have a method to list all supported
icu::FieldPositionIterator iter; // locales, use the one in icu::Locale per comments in
formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted, // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
&iter, status); return Intl::GetAvailableLocalesForLocale();
DCHECK(U_SUCCESS(status));
std::vector<icu::FieldPosition> field_positions = GenerateFieldPosition(iter);
return GenerateListFormatParts(isolate, formatted, field_positions);
} }
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -106,7 +106,6 @@ class JSListFormat : public JSObject { ...@@ -106,7 +106,6 @@ class JSListFormat : public JSObject {
// Layout description. // Layout description.
#define JS_LIST_FORMAT_FIELDS(V) \ #define JS_LIST_FORMAT_FIELDS(V) \
V(kJSListFormatOffset, kTaggedSize) \
V(kLocaleOffset, kTaggedSize) \ V(kLocaleOffset, kTaggedSize) \
V(kICUFormatterOffset, kTaggedSize) \ V(kICUFormatterOffset, kTaggedSize) \
V(kFlagsOffset, kTaggedSize) \ V(kFlagsOffset, kTaggedSize) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment