Commit 9695ce9f authored by cira@chromium.org's avatar cira@chromium.org

Trying to re-land http://codereview.chromium.org/6901141.

Changes from previus revision:
- Made my own strncpy in I18NUtils class (we can't use OS::SNPrintF nor snprintf).
- Fixed a crashing bug related to ICU call in LanguageMatcher::BCP47ToICUFormat.

TEST=Visit i18n.kaziprst.org/locale.html
Review URL: http://codereview.chromium.org/6928017

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7796 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 9a9211e2
...@@ -45,6 +45,10 @@ ...@@ -45,6 +45,10 @@
'i18n-extension.h', 'i18n-extension.h',
'i18n-locale.cc', 'i18n-locale.cc',
'i18n-locale.h', 'i18n-locale.h',
'i18n-utils.cc',
'i18n-utils.h',
'language-matcher.cc',
'language-matcher.h',
'<(SHARED_INTERMEDIATE_DIR)/i18n-js.cc', '<(SHARED_INTERMEDIATE_DIR)/i18n-js.cc',
], ],
'include_dirs': [ 'include_dirs': [
......
...@@ -55,20 +55,6 @@ v8::Handle<v8::FunctionTemplate> I18NExtension::GetNativeFunction( ...@@ -55,20 +55,6 @@ v8::Handle<v8::FunctionTemplate> I18NExtension::GetNativeFunction(
v8::Handle<v8::String> name) { v8::Handle<v8::String> name) {
if (name->Equals(v8::String::New("NativeJSLocale"))) { if (name->Equals(v8::String::New("NativeJSLocale"))) {
return v8::FunctionTemplate::New(I18NLocale::JSLocale); return v8::FunctionTemplate::New(I18NLocale::JSLocale);
} else if (name->Equals(v8::String::New("NativeJSAvailableLocales"))) {
return v8::FunctionTemplate::New(I18NLocale::JSAvailableLocales);
} else if (name->Equals(v8::String::New("NativeJSMaximizedLocale"))) {
return v8::FunctionTemplate::New(I18NLocale::JSMaximizedLocale);
} else if (name->Equals(v8::String::New("NativeJSMinimizedLocale"))) {
return v8::FunctionTemplate::New(I18NLocale::JSMinimizedLocale);
} else if (name->Equals(v8::String::New("NativeJSDisplayLanguage"))) {
return v8::FunctionTemplate::New(I18NLocale::JSDisplayLanguage);
} else if (name->Equals(v8::String::New("NativeJSDisplayScript"))) {
return v8::FunctionTemplate::New(I18NLocale::JSDisplayScript);
} else if (name->Equals(v8::String::New("NativeJSDisplayRegion"))) {
return v8::FunctionTemplate::New(I18NLocale::JSDisplayRegion);
} else if (name->Equals(v8::String::New("NativeJSDisplayName"))) {
return v8::FunctionTemplate::New(I18NLocale::JSDisplayName);
} else if (name->Equals(v8::String::New("NativeJSBreakIterator"))) { } else if (name->Equals(v8::String::New("NativeJSBreakIterator"))) {
return v8::FunctionTemplate::New(BreakIterator::JSBreakIterator); return v8::FunctionTemplate::New(BreakIterator::JSBreakIterator);
} else if (name->Equals(v8::String::New("NativeJSCollator"))) { } else if (name->Equals(v8::String::New("NativeJSCollator"))) {
......
...@@ -27,146 +27,86 @@ ...@@ -27,146 +27,86 @@
#include "i18n-locale.h" #include "i18n-locale.h"
#include <algorithm> #include "i18n-utils.h"
#include <string> #include "language-matcher.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
#include "utils.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
v8::Handle<v8::Value> I18NLocale::JSLocale(const v8::Arguments& args) { const char* const I18NLocale::kLocaleID = "localeID";
// TODO(cira): Fetch browser locale. Accept en-US as good default for now. const char* const I18NLocale::kRegionID = "regionID";
// We could possibly pass browser locale as a parameter in the constructor. const char* const I18NLocale::kICULocaleID = "icuLocaleID";
std::string locale_name("en-US");
if (args.Length() == 1 && args[0]->IsString()) {
locale_name = *v8::String::Utf8Value(args[0]->ToString());
}
v8::Local<v8::Object> locale = v8::Object::New();
locale->Set(v8::String::New("locale"), v8::String::New(locale_name.c_str()));
icu::Locale icu_locale(locale_name.c_str());
const char* language = icu_locale.getLanguage();
locale->Set(v8::String::New("language"), v8::String::New(language));
const char* script = icu_locale.getScript();
if (strlen(script)) {
locale->Set(v8::String::New("script"), v8::String::New(script));
}
const char* region = icu_locale.getCountry();
if (strlen(region)) {
locale->Set(v8::String::New("region"), v8::String::New(region));
}
return locale;
}
// TODO(cira): Filter out locales that Chrome doesn't support.
v8::Handle<v8::Value> I18NLocale::JSAvailableLocales(
const v8::Arguments& args) {
v8::Local<v8::Array> all_locales = v8::Array::New();
int count = 0;
const icu::Locale* icu_locales = icu::Locale::getAvailableLocales(count);
for (int i = 0; i < count; ++i) {
all_locales->Set(i, v8::String::New(icu_locales[i].getName()));
}
return all_locales;
}
// Use - as tag separator, not _ that ICU uses.
static std::string NormalizeLocale(const std::string& locale) {
std::string result(locale);
// TODO(cira): remove STL dependency.
std::replace(result.begin(), result.end(), '_', '-');
return result;
}
v8::Handle<v8::Value> I18NLocale::JSMaximizedLocale(const v8::Arguments& args) { v8::Handle<v8::Value> I18NLocale::JSLocale(const v8::Arguments& args) {
if (!args.Length() || !args[0]->IsString()) { v8::HandleScope handle_scope;
return v8::Undefined();
}
UErrorCode status = U_ZERO_ERROR; if (args.Length() != 1 || !args[0]->IsObject()) {
std::string locale_name = *v8::String::Utf8Value(args[0]->ToString());
char max_locale[ULOC_FULLNAME_CAPACITY];
uloc_addLikelySubtags(locale_name.c_str(), max_locale,
sizeof(max_locale), &status);
if (U_FAILURE(status)) {
return v8::Undefined(); return v8::Undefined();
} }
return v8::String::New(NormalizeLocale(max_locale).c_str()); v8::Local<v8::Object> settings = args[0]->ToObject();
}
v8::Handle<v8::Value> I18NLocale::JSMinimizedLocale(const v8::Arguments& args) { // Get best match for locale.
if (!args.Length() || !args[0]->IsString()) { v8::TryCatch try_catch;
v8::Handle<v8::Value> locale_id = settings->Get(v8::String::New(kLocaleID));
if (try_catch.HasCaught()) {
return v8::Undefined(); return v8::Undefined();
} }
UErrorCode status = U_ZERO_ERROR; LocaleIDMatch result;
std::string locale_name = *v8::String::Utf8Value(args[0]->ToString()); if (locale_id->IsArray()) {
char min_locale[ULOC_FULLNAME_CAPACITY]; LanguageMatcher::GetBestMatchForPriorityList(
uloc_minimizeSubtags(locale_name.c_str(), min_locale, v8::Handle<v8::Array>::Cast(locale_id), &result);
sizeof(min_locale), &status); } else if (locale_id->IsString()) {
if (U_FAILURE(status)) { LanguageMatcher::GetBestMatchForString(locale_id->ToString(), &result);
return v8::Undefined(); } else {
LanguageMatcher::GetBestMatchForString(v8::String::New(""), &result);
} }
return v8::String::New(NormalizeLocale(min_locale).c_str()); // Get best match for region.
} char region_id[ULOC_COUNTRY_CAPACITY];
I18NUtils::StrNCopy(region_id, ULOC_COUNTRY_CAPACITY, "");
// Common code for JSDisplayXXX methods. v8::Handle<v8::Value> region = settings->Get(v8::String::New(kRegionID));
static v8::Handle<v8::Value> GetDisplayItem(const v8::Arguments& args, if (try_catch.HasCaught()) {
const std::string& item) {
if (args.Length() != 2 || !args[0]->IsString() || !args[1]->IsString()) {
return v8::Undefined(); return v8::Undefined();
} }
std::string base_locale = *v8::String::Utf8Value(args[0]->ToString()); if (!GetBestMatchForRegionID(result.icu_id, region, region_id)) {
icu::Locale icu_locale(base_locale.c_str()); // Set region id to empty string because region couldn't be inferred.
icu::Locale display_locale = I18NUtils::StrNCopy(region_id, ULOC_COUNTRY_CAPACITY, "");
icu::Locale(*v8::String::Utf8Value(args[1]->ToString()));
icu::UnicodeString result;
if (item == "language") {
icu_locale.getDisplayLanguage(display_locale, result);
} else if (item == "script") {
icu_locale.getDisplayScript(display_locale, result);
} else if (item == "region") {
icu_locale.getDisplayCountry(display_locale, result);
} else if (item == "name") {
icu_locale.getDisplayName(display_locale, result);
} else {
return v8::Undefined();
} }
if (result.length()) { // Build JavaScript object that contains bcp and icu locale ID and region ID.
return v8::String::New( v8::Handle<v8::Object> locale = v8::Object::New();
reinterpret_cast<const uint16_t*>(result.getBuffer()), result.length()); locale->Set(v8::String::New(kLocaleID), v8::String::New(result.bcp47_id));
} locale->Set(v8::String::New(kICULocaleID), v8::String::New(result.icu_id));
locale->Set(v8::String::New(kRegionID), v8::String::New(region_id));
return v8::Undefined(); return handle_scope.Close(locale);
} }
v8::Handle<v8::Value> I18NLocale::JSDisplayLanguage(const v8::Arguments& args) { bool I18NLocale::GetBestMatchForRegionID(
return GetDisplayItem(args, "language"); const char* locale_id, v8::Handle<v8::Value> region_id, char* result) {
} if (region_id->IsString() && region_id->ToString()->Length() != 0) {
icu::Locale user_locale(
v8::Handle<v8::Value> I18NLocale::JSDisplayScript(const v8::Arguments& args) { icu::Locale("und", *v8::String::Utf8Value(region_id->ToString())));
return GetDisplayItem(args, "script"); I18NUtils::StrNCopy(
} result, ULOC_COUNTRY_CAPACITY, user_locale.getCountry());
return true;
v8::Handle<v8::Value> I18NLocale::JSDisplayRegion(const v8::Arguments& args) { }
return GetDisplayItem(args, "region"); // Maximize locale_id to infer the region (e.g. expand "de" to "de-Latn-DE"
} // and grab "DE" from the result).
UErrorCode status = U_ZERO_ERROR;
char maximized_locale[ULOC_FULLNAME_CAPACITY];
uloc_addLikelySubtags(
locale_id, maximized_locale, ULOC_FULLNAME_CAPACITY, &status);
uloc_getCountry(maximized_locale, result, ULOC_COUNTRY_CAPACITY, &status);
v8::Handle<v8::Value> I18NLocale::JSDisplayName(const v8::Arguments& args) { return !U_FAILURE(status);
return GetDisplayItem(args, "name");
} }
} } // namespace v8::internal } } // namespace v8::internal
...@@ -39,13 +39,20 @@ class I18NLocale { ...@@ -39,13 +39,20 @@ class I18NLocale {
// Implementations of window.Locale methods. // Implementations of window.Locale methods.
static v8::Handle<v8::Value> JSLocale(const v8::Arguments& args); static v8::Handle<v8::Value> JSLocale(const v8::Arguments& args);
static v8::Handle<v8::Value> JSAvailableLocales(const v8::Arguments& args);
static v8::Handle<v8::Value> JSMaximizedLocale(const v8::Arguments& args); // Infers region id given the locale id, or uses user specified region id.
static v8::Handle<v8::Value> JSMinimizedLocale(const v8::Arguments& args); // Result is canonicalized.
static v8::Handle<v8::Value> JSDisplayLanguage(const v8::Arguments& args); // Returns status of ICU operation (maximizing locale or get region call).
static v8::Handle<v8::Value> JSDisplayScript(const v8::Arguments& args); static bool GetBestMatchForRegionID(
static v8::Handle<v8::Value> JSDisplayRegion(const v8::Arguments& args); const char* locale_id, v8::Handle<v8::Value> regions, char* result);
static v8::Handle<v8::Value> JSDisplayName(const v8::Arguments& args);
private:
// Key name for localeID parameter.
static const char* const kLocaleID;
// Key name for regionID parameter.
static const char* const kRegionID;
// Key name for the icuLocaleID result.
static const char* const kICULocaleID;
}; };
} } // namespace v8::internal } } // namespace v8::internal
......
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "i18n-utils.h"
#include <string.h>
namespace v8 {
namespace internal {
// static
void I18NUtils::StrNCopy(char* dest, int length, const char* src) {
if (!dest || !src) return;
strncpy(dest, src, length);
dest[length - 1] = '\0';
}
} } // namespace v8::internal
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_EXTENSIONS_EXPERIMENTAL_I18N_UTILS_H_
#define V8_EXTENSIONS_EXPERIMENTAL_I18N_UTILS_H_
namespace v8 {
namespace internal {
class I18NUtils {
public:
// Safe string copy. Null terminates the destination. Copies at most
// (length - 1) bytes.
// We can't use snprintf since it's not supported on all relevant platforms.
// We can't use OS::SNPrintF, it's only for internal code.
// TODO(cira): Find a way to use OS::SNPrintF instead.
static void StrNCopy(char* dest, int length, const char* src);
private:
I18NUtils() {}
};
} } // namespace v8::internal
#endif // V8_EXTENSIONS_EXPERIMENTAL_I18N_UTILS_H_
...@@ -25,70 +25,71 @@ ...@@ -25,70 +25,71 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// TODO(cira): Remove v8 prefix from v8Locale once we have stable API. // TODO(cira): Rename v8Locale into LocaleInfo once we have stable API.
v8Locale = function(optLocale) { /**
* LocaleInfo class is an aggregate class of all i18n API calls.
* @param {Object} settings - localeID and regionID to create LocaleInfo from.
* {Array.<string>|string} settings.localeID -
* Unicode identifier of the locale.
* See http://unicode.org/reports/tr35/#BCP_47_Conformance
* {string} settings.regionID - ISO3166 region ID with addition of
* invalid, undefined and reserved region codes.
* @constructor
*/
v8Locale = function(settings) {
native function NativeJSLocale(); native function NativeJSLocale();
var properties = NativeJSLocale(optLocale);
this.locale = properties.locale;
this.language = properties.language;
this.script = properties.script;
this.region = properties.region;
};
v8Locale.availableLocales = function() {
native function NativeJSAvailableLocales();
return NativeJSAvailableLocales();
};
v8Locale.prototype.maximizedLocale = function() { // Assume user wanted to do v8Locale("sr");
native function NativeJSMaximizedLocale(); if (typeof(settings) === "string") {
return new v8Locale(NativeJSMaximizedLocale(this.locale)); settings = {'localeID': settings};
};
v8Locale.prototype.minimizedLocale = function() {
native function NativeJSMinimizedLocale();
return new v8Locale(NativeJSMinimizedLocale(this.locale));
};
v8Locale.prototype.displayLocale_ = function(displayLocale) {
var result = this.locale;
if (displayLocale !== undefined) {
result = displayLocale.locale;
} }
return result;
};
v8Locale.prototype.displayLanguage = function(optDisplayLocale) { var properties = NativeJSLocale(
var displayLocale = this.displayLocale_(optDisplayLocale); v8Locale.createSettingsOrDefault_(settings, {'localeID': 'root'}));
native function NativeJSDisplayLanguage();
return NativeJSDisplayLanguage(this.locale, displayLocale);
};
v8Locale.prototype.displayScript = function(optDisplayLocale) {
var displayLocale = this.displayLocale_(optDisplayLocale);
native function NativeJSDisplayScript();
return NativeJSDisplayScript(this.locale, displayLocale);
};
v8Locale.prototype.displayRegion = function(optDisplayLocale) { // Keep the resolved ICU locale ID around to avoid resolving localeID to
var displayLocale = this.displayLocale_(optDisplayLocale); // ICU locale ID every time BreakIterator, Collator and so forth are called.
native function NativeJSDisplayRegion(); this.__icuLocaleID__ = properties.icuLocaleID;
return NativeJSDisplayRegion(this.locale, displayLocale); this.options = {'localeID': properties.localeID,
'regionID': properties.regionID};
}; };
v8Locale.prototype.displayName = function(optDisplayLocale) { /**
var displayLocale = this.displayLocale_(optDisplayLocale); * Clones existing locale with possible overrides for some of the options.
native function NativeJSDisplayName(); * @param {!Object} settings - overrides for current locale settings.
return NativeJSDisplayName(this.locale, displayLocale); * @returns {Object} - new LocaleInfo object.
*/
v8Locale.prototype.derive = function(settings) {
return new v8Locale(
v8Locale.createSettingsOrDefault_(settings, this.options));
}; };
/**
* v8BreakIterator class implements locale aware segmenatation.
* It is not part of EcmaScript proposal.
* @param {Object} locale - locale object to pass to break
* iterator implementation.
* @param {string} type - type of segmenatation:
* - character
* - word
* - sentence
* - line
* @constructor
*/
v8Locale.v8BreakIterator = function(locale, type) { v8Locale.v8BreakIterator = function(locale, type) {
native function NativeJSBreakIterator(); native function NativeJSBreakIterator();
var iterator = NativeJSBreakIterator(locale, type);
locale = v8Locale.createLocaleOrDefault_(locale);
// BCP47 ID would work in this case, but we use ICU locale for consistency.
var iterator = NativeJSBreakIterator(locale.__icuLocaleID__, type);
iterator.type = type; iterator.type = type;
return iterator; return iterator;
}; };
/**
* Type of the break we encountered during previous iteration.
* @type{Enum}
*/
v8Locale.v8BreakIterator.BreakType = { v8Locale.v8BreakIterator.BreakType = {
'unknown': -1, 'unknown': -1,
'none': 0, 'none': 0,
...@@ -98,19 +99,82 @@ v8Locale.v8BreakIterator.BreakType = { ...@@ -98,19 +99,82 @@ v8Locale.v8BreakIterator.BreakType = {
'ideo': 400 'ideo': 400
}; };
/**
* Creates new v8BreakIterator based on current locale.
* @param {string} - type of segmentation. See constructor.
* @returns {Object} - new v8BreakIterator object.
*/
v8Locale.prototype.v8CreateBreakIterator = function(type) { v8Locale.prototype.v8CreateBreakIterator = function(type) {
return new v8Locale.v8BreakIterator(this.locale, type); return new v8Locale.v8BreakIterator(this, type);
}; };
// TODO(jungshik): Set |collator.options| to actually recognized / resolved // TODO(jungshik): Set |collator.options| to actually recognized / resolved
// values. // values.
v8Locale.Collator = function(locale, options) { /**
* Collator class implements locale-aware sort.
* @param {Object} locale - locale object to pass to collator implementation.
* @param {Object} settings - collation flags:
* - ignoreCase
* - ignoreAccents
* - numeric
* @constructor
*/
v8Locale.Collator = function(locale, settings) {
native function NativeJSCollator(); native function NativeJSCollator();
var collator = NativeJSCollator(locale,
options === undefined ? {} : options); locale = v8Locale.createLocaleOrDefault_(locale);
var collator = NativeJSCollator(
locale.__icuLocaleID__, v8Locale.createSettingsOrDefault_(settings, {}));
return collator; return collator;
}; };
v8Locale.prototype.createCollator = function(options) { /**
return new v8Locale.Collator(this.locale, options); * Creates new Collator based on current locale.
* @param {Object} - collation flags. See constructor.
* @returns {Object} - new v8BreakIterator object.
*/
v8Locale.prototype.createCollator = function(settings) {
return new v8Locale.Collator(this, settings);
};
/**
* Merges user settings and defaults.
* Settings that are not of object type are rejected.
* Actual property values are not validated, but whitespace is trimmed if they
* are strings.
* @param {!Object} settings - user provided settings.
* @param {!Object} defaults - default values for this type of settings.
* @returns {Object} - valid settings object.
*/
v8Locale.createSettingsOrDefault_ = function(settings, defaults) {
if (!settings || typeof(settings) !== 'object' ) {
return defaults;
}
for (var key in defaults) {
if (!settings.hasOwnProperty(key)) {
settings[key] = defaults[key];
}
}
// Clean up values, like trimming whitespace.
for (var key in settings) {
if (typeof(settings[key]) === "string") {
settings[key] = settings[key].trim();
}
}
return settings;
};
/**
* If locale is valid (defined and of v8Locale type) we return it. If not
* we create default locale and return it.
* @param {!Object} locale - user provided locale.
* @returns {Object} - v8Locale object.
*/
v8Locale.createLocaleOrDefault_ = function(locale) {
if (!locale || !(locale instanceof v8Locale)) {
return new v8Locale();
} else {
return locale;
}
}; };
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// TODO(cira): Remove LanguageMatcher from v8 when ICU implements
// language matching API.
#include "language-matcher.h"
#include "i18n-utils.h"
#include "unicode/datefmt.h" // For getAvailableLocales
#include "unicode/locid.h"
#include "unicode/uloc.h"
#include "utils.h"
namespace v8 {
namespace internal {
const unsigned int LanguageMatcher::kLanguageWeight = 75;
const unsigned int LanguageMatcher::kScriptWeight = 20;
const unsigned int LanguageMatcher::kRegionWeight = 5;
const unsigned int LanguageMatcher::kThreshold = 50;
const unsigned int LanguageMatcher::kPositionBonus = 1;
const char* const LanguageMatcher::kDefaultLocale = "root";
static const char* GetLanguageException(const char*);
static bool BCP47ToICUFormat(const char*, char*);
static int CompareLocaleSubtags(const char*, const char*);
static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*);
LocaleIDMatch::LocaleIDMatch()
: score(-1) {
I18NUtils::StrNCopy(
bcp47_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
I18NUtils::StrNCopy(
icu_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale);
}
LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) {
I18NUtils::StrNCopy(this->bcp47_id, ULOC_FULLNAME_CAPACITY, rhs.bcp47_id);
I18NUtils::StrNCopy(this->icu_id, ULOC_FULLNAME_CAPACITY, rhs.icu_id);
this->score = rhs.score;
return *this;
}
// static
void LanguageMatcher::GetBestMatchForPriorityList(
v8::Handle<v8::Array> locales, LocaleIDMatch* result) {
v8::HandleScope handle_scope;
unsigned int position_bonus = locales->Length() * kPositionBonus;
int max_score = 0;
LocaleIDMatch match;
for (unsigned int i = 0; i < locales->Length(); ++i) {
position_bonus -= kPositionBonus;
v8::TryCatch try_catch;
v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i));
// Return default if exception is raised when reading parameter.
if (try_catch.HasCaught()) break;
// JavaScript arrays can be heterogenous so check each item
// if it's a string.
if (!locale_id->IsString()) continue;
if (!CompareToSupportedLocaleIDList(locale_id->ToString(), &match)) {
continue;
}
// Skip items under threshold.
if (match.score < kThreshold) continue;
match.score += position_bonus;
if (match.score > max_score) {
*result = match;
max_score = match.score;
}
}
}
// static
void LanguageMatcher::GetBestMatchForString(
v8::Handle<v8::String> locale, LocaleIDMatch* result) {
LocaleIDMatch match;
if (CompareToSupportedLocaleIDList(locale, &match) &&
match.score >= kThreshold) {
*result = match;
}
}
// static
bool LanguageMatcher::CompareToSupportedLocaleIDList(
v8::Handle<v8::String> locale_id, LocaleIDMatch* result) {
static int32_t available_count = 0;
// Depending on how ICU data is built, locales returned by
// Locale::getAvailableLocale() are not guaranteed to support DateFormat,
// Collation and other services. We can call getAvailableLocale() of all the
// services we want to support and take the intersection of them all, but
// using DateFormat::getAvailableLocales() should suffice.
// TODO(cira): Maybe make this thread-safe?
static const icu::Locale* available_locales =
icu::DateFormat::getAvailableLocales(available_count);
// Skip this locale_id if it's not in ASCII.
static LocaleIDMatch default_match;
v8::String::AsciiValue ascii_value(locale_id);
if (*ascii_value == NULL) return false;
char locale[ULOC_FULLNAME_CAPACITY];
if (!BCP47ToICUFormat(*ascii_value, locale)) return false;
icu::Locale input_locale(locale);
// Position of the best match locale in list of available locales.
int position = -1;
const char* language = GetLanguageException(input_locale.getLanguage());
const char* script = input_locale.getScript();
const char* region = input_locale.getCountry();
for (int32_t i = 0; i < available_count; ++i) {
int current_score = 0;
int sign =
CompareLocaleSubtags(language, available_locales[i].getLanguage());
current_score += sign * kLanguageWeight;
sign = CompareLocaleSubtags(script, available_locales[i].getScript());
current_score += sign * kScriptWeight;
sign = CompareLocaleSubtags(region, available_locales[i].getCountry());
current_score += sign * kRegionWeight;
if (current_score >= kThreshold && current_score > result->score) {
result->score = current_score;
position = i;
}
}
// Didn't find any good matches so use defaults.
if (position == -1) return false;
return BuildLocaleName(available_locales[position].getBaseName(),
input_locale.getName(), result);
}
// For some unsupported language subtags it is better to fallback to related
// language that is supported than to default.
static const char* GetLanguageException(const char* language) {
// Serbo-croatian to Serbian.
if (!strcmp(language, "sh")) return "sr";
// Norweigan to Norweiaan to Norwegian Bokmal.
if (!strcmp(language, "no")) return "nb";
// Moldavian to Romanian.
if (!strcmp(language, "mo")) return "ro";
// Tagalog to Filipino.
if (!strcmp(language, "tl")) return "fil";
return language;
}
// Converts user input from BCP47 locale id format to ICU compatible format.
// Returns false if uloc_forLanguageTag call fails or if extension is too long.
static bool BCP47ToICUFormat(const char* locale_id, char* result) {
UErrorCode status = U_ZERO_ERROR;
int32_t locale_size = 0;
char locale[ULOC_FULLNAME_CAPACITY];
I18NUtils::StrNCopy(locale, ULOC_FULLNAME_CAPACITY, locale_id);
// uloc_forLanguageTag has a bug where long extension can crash the code.
// We need to check if extension part of language id conforms to the length.
// ICU bug: http://bugs.icu-project.org/trac/ticket/8519
const char* extension = strstr(locale_id, "-u-");
if (extension != NULL &&
strlen(extension) > ULOC_KEYWORD_AND_VALUES_CAPACITY) {
// Truncate to get non-crashing string, but still preserve base language.
int base_length = strlen(locale_id) - strlen(extension);
locale[base_length] = '\0';
}
uloc_forLanguageTag(locale, result, ULOC_FULLNAME_CAPACITY,
&locale_size, &status);
return !U_FAILURE(status);
}
// Compares locale id subtags.
// Returns 1 for match or -1 for mismatch.
static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) {
return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1;
}
// Builds a BCP47 compliant locale id from base name of matched locale and
// full user specified locale.
// Returns false if uloc_toLanguageTag failed to convert locale id.
// Example:
// base_name of matched locale (ICU ID): de_DE
// input_locale_name (ICU ID): de_AT@collation=phonebk
// result (ICU ID): de_DE@collation=phonebk
// result (BCP47 ID): de-DE-u-co-phonebk
static bool BuildLocaleName(const char* base_name,
const char* input_locale_name,
LocaleIDMatch* result) {
I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
// Get extensions (if any) from the original locale.
const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR);
if (extension != NULL) {
I18NUtils::StrNCopy(result->icu_id + strlen(base_name),
ULOC_KEYWORD_AND_VALUES_CAPACITY, extension);
} else {
I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name);
}
// Convert ICU locale name into BCP47 format.
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(result->icu_id, result->bcp47_id,
ULOC_FULLNAME_CAPACITY, false, &status);
return !U_FAILURE(status);
}
} } // namespace v8::internal
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_EXTENSIONS_EXPERIMENTAL_LANGUAGE_MATCHER_H_
#define V8_EXTENSIONS_EXPERIMENTAL_LANGUAGE_MATCHER_H_
#include <v8.h>
#include "unicode/uloc.h"
namespace v8 {
namespace internal {
struct LocaleIDMatch {
LocaleIDMatch();
LocaleIDMatch& operator=(const LocaleIDMatch& rhs);
// Bcp47 locale id - "de-Latn-DE-u-co-phonebk".
char bcp47_id[ULOC_FULLNAME_CAPACITY];
// ICU locale id - "de_Latn_DE@collation=phonebk".
char icu_id[ULOC_FULLNAME_CAPACITY];
// Score for this locale.
int score;
};
class LanguageMatcher {
public:
// Default locale.
static const char* const kDefaultLocale;
// Finds best supported locale for a given a list of locale identifiers.
// It preserves the extension for the locale id.
static void GetBestMatchForPriorityList(
v8::Handle<v8::Array> locale_list, LocaleIDMatch* result);
// Finds best supported locale for a single locale identifier.
// It preserves the extension for the locale id.
static void GetBestMatchForString(
v8::Handle<v8::String> locale_id, LocaleIDMatch* result);
private:
// If langauge subtags match add this amount to the score.
static const unsigned int kLanguageWeight;
// If script subtags match add this amount to the score.
static const unsigned int kScriptWeight;
// If region subtags match add this amount to the score.
static const unsigned int kRegionWeight;
// LocaleID match score has to be over this number to accept the match.
static const unsigned int kThreshold;
// For breaking ties in priority queue.
static const unsigned int kPositionBonus;
LanguageMatcher();
// Compares locale_id to the supported list of locales and returns best
// match.
// Returns false if it fails to convert locale id from ICU to BCP47 format.
static bool CompareToSupportedLocaleIDList(v8::Handle<v8::String> locale_id,
LocaleIDMatch* result);
};
} } // namespace v8::internal
#endif // V8_EXTENSIONS_EXPERIMENTAL_LANGUAGE_MATCHER_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment