Commit d77d6577 authored by yangguo's avatar yangguo Committed by Commit bot

Revert of [regexp] implement /ui to mirror the implementation for /i....

Revert of [regexp] implement /ui to mirror the implementation for /i. (patchset #2 id:20001 of https://codereview.chromium.org/1641613002/ )

Reason for revert:
This test fails:

assertEquals(["as"], /^a[\u017F]/ui.exec("as"));

The reason is that we end up with a character class that is not stand alone, so we do not perform case folding on it correctly (with unicode flag).

Original issue's description:
> [regexp] implement /ui to mirror the implementation for /i.
>
> R=erik.corry@gmail.com, erikcorry@chromium.org
>
> Committed: https://crrev.com/eea1a4c003c559c99bcc9f08aa7eadf931975aad
> Cr-Commit-Position: refs/heads/master@{#33655}

TBR=erik.corry@gmail.com,erikcorry@chromium.org,erikcorry@google.com
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true

Review URL: https://codereview.chromium.org/1661483002

Cr-Commit-Position: refs/heads/master@{#33676}
parent 6b2001b6
This diff is collapsed.
......@@ -529,7 +529,7 @@ class RegExpNode: public ZoneObject {
// the number of nodes we are willing to look at in order to create this data.
static const int kRecursionBudget = 200;
bool KeepRecursing(RegExpCompiler* compiler);
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
UNREACHABLE();
}
......@@ -537,7 +537,7 @@ class RegExpNode: public ZoneObject {
// If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
// itself, or NULL if the node can never match.
virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) {
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) {
return this;
}
// Helper for FilterOneByte.
......@@ -611,15 +611,15 @@ class SeqRegExpNode: public RegExpNode {
: RegExpNode(on_success->zone()), on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
on_success_->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start);
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
protected:
RegExpNode* FilterSuccessor(int depth, RegExpCompiler* compiler);
RegExpNode* FilterSuccessor(int depth, bool ignore_case);
private:
RegExpNode* on_success_;
......@@ -665,7 +665,7 @@ class ActionNode: public SeqRegExpNode {
return on_success()->GetQuickCheckDetails(
details, compiler, filled_in, not_at_start);
}
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
ActionType action_type() { return action_type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops.
......@@ -744,10 +744,10 @@ class TextNode: public SeqRegExpNode {
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler);
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
void CalculateOffsets();
virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
private:
enum TextEmitPassType {
......@@ -803,7 +803,7 @@ class AssertionNode: public SeqRegExpNode {
RegExpCompiler* compiler,
int filled_in,
bool not_at_start);
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
AssertionType assertion_type() { return assertion_type_; }
......@@ -841,7 +841,7 @@ class BackReferenceNode: public SeqRegExpNode {
bool not_at_start) {
return;
}
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
private:
......@@ -867,7 +867,7 @@ class EndNode: public RegExpNode {
// Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE();
}
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
// Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE();
......@@ -960,7 +960,7 @@ class ChoiceNode: public RegExpNode {
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
bool being_calculated() { return being_calculated_; }
......@@ -970,7 +970,7 @@ class ChoiceNode: public RegExpNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual bool read_backward() { return false; }
protected:
......@@ -1028,9 +1028,9 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
alternatives_->at(1).node()->FillInBMInfo(compiler, offset, budget - 1, bm,
alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm,
not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
......@@ -1042,7 +1042,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return !is_first;
}
virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
};
......@@ -1062,14 +1062,14 @@ class LoopChoiceNode: public ChoiceNode {
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
private:
// AddAlternative is made private for loop nodes because alternatives
......
......@@ -1294,10 +1294,7 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
#ifdef V8_I18N_SUPPORT
if (unicode() && ignore_case() && c >= kNonBmpStart) {
// BMP characters are handled in the case-insensitive TextEmitPass.
// Surrogate code units do not have case equivalents.
// Non-BMP characters need to be desugared into two uc16 parts.
if (unicode() && ignore_case()) {
USet* set = uset_open(c, c);
uset_closeOver(set, USET_CASE_INSENSITIVE);
uset_removeAllStrings(set);
......
......@@ -15,8 +15,8 @@
namespace unibrow {
typedef int32_t uchar;
typedef uint8_t byte;
typedef unsigned int uchar;
typedef unsigned char byte;
/**
* The max length of the result of converting the case of a single
......@@ -130,7 +130,7 @@ class Utf16 {
class Utf8 {
public:
static inline unsigned Length(uchar chr, int previous);
static inline uchar Length(uchar chr, int previous);
static inline unsigned EncodeOneByte(char* out, uint8_t c);
static inline unsigned Encode(char* out,
uchar c,
......
......@@ -1382,7 +1382,7 @@ TEST(IsAscii) {
template<typename Op, bool return_first>
static uint16_t ConvertLatin1(uint16_t c) {
uc32 result[Op::kMaxWidth];
uint32_t result[Op::kMaxWidth];
int chars;
chars = Op::Convert(c, 0, result, NULL);
if (chars == 0) return 0;
......
......@@ -50,3 +50,10 @@ assertTrue(/\u1f6b/ui.test("\u1f63"));
// Back references.
assertNull(/(.)\1\1/ui.exec("\u00e5\u212b\u00c5"));
assertNull(/(.)\1/ui.exec("\u{118aa}\u{118ca}"));
// Non-Latin1 maps to Latin1.
assertNull(/^\u017F/ui.exec("s"));
assertNull(/^\u017F/ui.exec("s\u1234"));
assertNull(/^a[\u017F]/ui.exec("as"));
assertNull(/^a[\u017F]/ui.exec("as\u1234"));
......@@ -56,3 +56,9 @@ assertEquals(["\u{118aa}\u{118ca}", "\u{118aa}"],
// Misc.
assertTrue(/\u00e5\u00e5\u00e5/ui.test("\u212b\u00e5\u00c5"));
assertTrue(/AB\u{10400}/ui.test("ab\u{10428}"));
// Non-Latin1 maps to Latin1.
assertEquals(["s"], /^\u017F/ui.exec("s"));
assertEquals(["s"], /^\u017F/ui.exec("s\u1234"));
assertEquals(["as"], /^a[\u017F]/ui.exec("as"));
assertEquals(["as"], /^a[\u017F]/ui.exec("as\u1234"));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment