Commit 8c4988f7 authored by yangguo's avatar yangguo Committed by Commit bot

[regexp] use C++ API for unicode set over C API.

R=jshin@chromium.org

Review-Url: https://codereview.chromium.org/2514333002
Cr-Commit-Position: refs/heads/master@{#41164}
parent 5d4253ec
......@@ -27,7 +27,7 @@
#include "src/unicode-decoder.h"
#ifdef V8_I18N_SUPPORT
#include "unicode/uset.h"
#include "unicode/uniset.h"
#include "unicode/utypes.h"
#endif // V8_I18N_SUPPORT
......@@ -5114,30 +5114,22 @@ void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
// Use ICU to compute the case fold closure over the ranges.
DCHECK(compiler->unicode());
DCHECK(compiler->ignore_case());
USet* set = uset_openEmpty();
icu::UnicodeSet set;
for (int i = 0; i < ranges->length(); i++) {
uset_addRange(set, ranges->at(i).from(), ranges->at(i).to());
set.add(ranges->at(i).from(), ranges->at(i).to());
}
ranges->Clear();
uset_closeOver(set, USET_CASE_INSENSITIVE);
set.closeOver(USET_CASE_INSENSITIVE);
// Full case mapping map single characters to multiple characters.
// Those are represented as strings in the set. Remove them so that
// we end up with only simple and common case mappings.
uset_removeAllStrings(set);
int item_count = uset_getItemCount(set);
int item_result = 0;
UErrorCode ec = U_ZERO_ERROR;
set.removeAllStrings();
Zone* zone = compiler->zone();
for (int i = 0; i < item_count; i++) {
uc32 start = 0;
uc32 end = 0;
item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
ranges->Add(CharacterRange::Range(start, end), zone);
for (int i = 0; i < set.getRangeCount(); i++) {
ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
zone);
}
// No errors and everything we collected have been ranges.
DCHECK_EQ(U_ZERO_ERROR, ec);
DCHECK_EQ(0, item_result);
uset_close(set);
#else
// Fallback if ICU is not included.
CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(),
......
......@@ -13,7 +13,7 @@
#include "src/utils.h"
#ifdef V8_I18N_SUPPORT
#include "unicode/uset.h"
#include "unicode/uniset.h"
#endif // V8_I18N_SUPPORT
namespace v8 {
......@@ -1100,26 +1100,20 @@ bool LookupPropertyValueName(UProperty property,
return false;
}
USet* set = uset_openEmpty();
UErrorCode ec = U_ZERO_ERROR;
uset_applyIntPropertyValue(set, property, property_value, &ec);
bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set);
icu::UnicodeSet set;
set.applyIntPropertyValue(property, property_value, ec);
bool success = ec == U_ZERO_ERROR && !set.isEmpty();
if (success) {
uset_removeAllStrings(set);
if (negate) uset_complement(set);
int item_count = uset_getItemCount(set);
int item_result = 0;
for (int i = 0; i < item_count; i++) {
uc32 start = 0;
uc32 end = 0;
item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
result->Add(CharacterRange::Range(start, end), zone);
set.removeAllStrings();
if (negate) set.complement();
for (int i = 0; i < set.getRangeCount(); i++) {
result->Add(
CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
zone);
}
DCHECK_EQ(U_ZERO_ERROR, ec);
DCHECK_EQ(0, item_result);
}
uset_close(set);
return success;
}
......@@ -1732,12 +1726,10 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
#ifdef V8_I18N_SUPPORT
if (unicode() && ignore_case()) {
USet* set = uset_open(c, c);
uset_closeOver(set, USET_CASE_INSENSITIVE);
uset_removeAllStrings(set);
bool result = uset_size(set) > 1;
uset_close(set);
return result;
icu::UnicodeSet set(c, c);
set.closeOver(USET_CASE_INSENSITIVE);
set.removeAllStrings();
return set.size() > 1;
}
// In the case where ICU is not included, we act as if the unicode flag is
// not set, and do not desugar.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment