Commit 8c4988f7 authored by yangguo's avatar yangguo Committed by Commit bot

[regexp] use C++ API for unicode set over C API.

R=jshin@chromium.org

Review-Url: https://codereview.chromium.org/2514333002
Cr-Commit-Position: refs/heads/master@{#41164}
parent 5d4253ec
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include "src/unicode-decoder.h" #include "src/unicode-decoder.h"
#ifdef V8_I18N_SUPPORT #ifdef V8_I18N_SUPPORT
#include "unicode/uset.h" #include "unicode/uniset.h"
#include "unicode/utypes.h" #include "unicode/utypes.h"
#endif // V8_I18N_SUPPORT #endif // V8_I18N_SUPPORT
...@@ -5114,30 +5114,22 @@ void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, ...@@ -5114,30 +5114,22 @@ void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
// Use ICU to compute the case fold closure over the ranges. // Use ICU to compute the case fold closure over the ranges.
DCHECK(compiler->unicode()); DCHECK(compiler->unicode());
DCHECK(compiler->ignore_case()); DCHECK(compiler->ignore_case());
USet* set = uset_openEmpty(); icu::UnicodeSet set;
for (int i = 0; i < ranges->length(); i++) { for (int i = 0; i < ranges->length(); i++) {
uset_addRange(set, ranges->at(i).from(), ranges->at(i).to()); set.add(ranges->at(i).from(), ranges->at(i).to());
} }
ranges->Clear(); ranges->Clear();
uset_closeOver(set, USET_CASE_INSENSITIVE); set.closeOver(USET_CASE_INSENSITIVE);
// Full case mapping map single characters to multiple characters. // Full case mapping map single characters to multiple characters.
// Those are represented as strings in the set. Remove them so that // Those are represented as strings in the set. Remove them so that
// we end up with only simple and common case mappings. // we end up with only simple and common case mappings.
uset_removeAllStrings(set); set.removeAllStrings();
int item_count = uset_getItemCount(set);
int item_result = 0;
UErrorCode ec = U_ZERO_ERROR;
Zone* zone = compiler->zone(); Zone* zone = compiler->zone();
for (int i = 0; i < item_count; i++) { for (int i = 0; i < set.getRangeCount(); i++) {
uc32 start = 0; ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
uc32 end = 0; zone);
item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
ranges->Add(CharacterRange::Range(start, end), zone);
} }
// No errors and everything we collected have been ranges. // No errors and everything we collected have been ranges.
DCHECK_EQ(U_ZERO_ERROR, ec);
DCHECK_EQ(0, item_result);
uset_close(set);
#else #else
// Fallback if ICU is not included. // Fallback if ICU is not included.
CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(), CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(),
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "src/utils.h" #include "src/utils.h"
#ifdef V8_I18N_SUPPORT #ifdef V8_I18N_SUPPORT
#include "unicode/uset.h" #include "unicode/uniset.h"
#endif // V8_I18N_SUPPORT #endif // V8_I18N_SUPPORT
namespace v8 { namespace v8 {
...@@ -1100,26 +1100,20 @@ bool LookupPropertyValueName(UProperty property, ...@@ -1100,26 +1100,20 @@ bool LookupPropertyValueName(UProperty property,
return false; return false;
} }
USet* set = uset_openEmpty();
UErrorCode ec = U_ZERO_ERROR; UErrorCode ec = U_ZERO_ERROR;
uset_applyIntPropertyValue(set, property, property_value, &ec); icu::UnicodeSet set;
bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); set.applyIntPropertyValue(property, property_value, ec);
bool success = ec == U_ZERO_ERROR && !set.isEmpty();
if (success) { if (success) {
uset_removeAllStrings(set); set.removeAllStrings();
if (negate) uset_complement(set); if (negate) set.complement();
int item_count = uset_getItemCount(set); for (int i = 0; i < set.getRangeCount(); i++) {
int item_result = 0; result->Add(
for (int i = 0; i < item_count; i++) { CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
uc32 start = 0; zone);
uc32 end = 0;
item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
result->Add(CharacterRange::Range(start, end), zone);
} }
DCHECK_EQ(U_ZERO_ERROR, ec);
DCHECK_EQ(0, item_result);
} }
uset_close(set);
return success; return success;
} }
...@@ -1732,12 +1726,10 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) { ...@@ -1732,12 +1726,10 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) { bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
#ifdef V8_I18N_SUPPORT #ifdef V8_I18N_SUPPORT
if (unicode() && ignore_case()) { if (unicode() && ignore_case()) {
USet* set = uset_open(c, c); icu::UnicodeSet set(c, c);
uset_closeOver(set, USET_CASE_INSENSITIVE); set.closeOver(USET_CASE_INSENSITIVE);
uset_removeAllStrings(set); set.removeAllStrings();
bool result = uset_size(set) > 1; return set.size() > 1;
uset_close(set);
return result;
} }
// In the case where ICU is not included, we act as if the unicode flag is // In the case where ICU is not included, we act as if the unicode flag is
// not set, and do not desugar. // not set, and do not desugar.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment