Commit eea1a4c0 authored by yangguo's avatar yangguo Committed by Commit bot

[regexp] implement /ui to mirror the implementation for /i.

R=erik.corry@gmail.com, erikcorry@chromium.org

Review URL: https://codereview.chromium.org/1641613002

Cr-Commit-Position: refs/heads/master@{#33655}
parent 1f85ff07
This diff is collapsed.
...@@ -529,7 +529,7 @@ class RegExpNode: public ZoneObject { ...@@ -529,7 +529,7 @@ class RegExpNode: public ZoneObject {
// the number of nodes we are willing to look at in order to create this data. // the number of nodes we are willing to look at in order to create this data.
static const int kRecursionBudget = 200; static const int kRecursionBudget = 200;
bool KeepRecursing(RegExpCompiler* compiler); bool KeepRecursing(RegExpCompiler* compiler);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) { BoyerMooreLookahead* bm, bool not_at_start) {
UNREACHABLE(); UNREACHABLE();
} }
...@@ -537,7 +537,7 @@ class RegExpNode: public ZoneObject { ...@@ -537,7 +537,7 @@ class RegExpNode: public ZoneObject {
// If we know that the input is one-byte then there are some nodes that can // If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for // never match. This method returns a node that can be substituted for
// itself, or NULL if the node can never match. // itself, or NULL if the node can never match.
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) { virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) {
return this; return this;
} }
// Helper for FilterOneByte. // Helper for FilterOneByte.
...@@ -611,15 +611,15 @@ class SeqRegExpNode: public RegExpNode { ...@@ -611,15 +611,15 @@ class SeqRegExpNode: public RegExpNode {
: RegExpNode(on_success->zone()), on_success_(on_success) { } : RegExpNode(on_success->zone()), on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; } RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; } void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) { BoyerMooreLookahead* bm, bool not_at_start) {
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); on_success_->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm); if (offset == 0) set_bm_info(not_at_start, bm);
} }
protected: protected:
RegExpNode* FilterSuccessor(int depth, bool ignore_case); RegExpNode* FilterSuccessor(int depth, RegExpCompiler* compiler);
private: private:
RegExpNode* on_success_; RegExpNode* on_success_;
...@@ -665,7 +665,7 @@ class ActionNode: public SeqRegExpNode { ...@@ -665,7 +665,7 @@ class ActionNode: public SeqRegExpNode {
return on_success()->GetQuickCheckDetails( return on_success()->GetQuickCheckDetails(
details, compiler, filled_in, not_at_start); details, compiler, filled_in, not_at_start);
} }
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start); BoyerMooreLookahead* bm, bool not_at_start);
ActionType action_type() { return action_type_; } ActionType action_type() { return action_type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops. // TODO(erikcorry): We should allow some action nodes in greedy loops.
...@@ -744,10 +744,10 @@ class TextNode: public SeqRegExpNode { ...@@ -744,10 +744,10 @@ class TextNode: public SeqRegExpNode {
virtual int GreedyLoopTextLength(); virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler); RegExpCompiler* compiler);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start); BoyerMooreLookahead* bm, bool not_at_start);
void CalculateOffsets(); void CalculateOffsets();
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
private: private:
enum TextEmitPassType { enum TextEmitPassType {
...@@ -803,7 +803,7 @@ class AssertionNode: public SeqRegExpNode { ...@@ -803,7 +803,7 @@ class AssertionNode: public SeqRegExpNode {
RegExpCompiler* compiler, RegExpCompiler* compiler,
int filled_in, int filled_in,
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start); BoyerMooreLookahead* bm, bool not_at_start);
AssertionType assertion_type() { return assertion_type_; } AssertionType assertion_type() { return assertion_type_; }
...@@ -841,7 +841,7 @@ class BackReferenceNode: public SeqRegExpNode { ...@@ -841,7 +841,7 @@ class BackReferenceNode: public SeqRegExpNode {
bool not_at_start) { bool not_at_start) {
return; return;
} }
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start); BoyerMooreLookahead* bm, bool not_at_start);
private: private:
...@@ -867,7 +867,7 @@ class EndNode: public RegExpNode { ...@@ -867,7 +867,7 @@ class EndNode: public RegExpNode {
// Returning 0 from EatsAtLeast should ensure we never get here. // Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE(); UNREACHABLE();
} }
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) { BoyerMooreLookahead* bm, bool not_at_start) {
// Returning 0 from EatsAtLeast should ensure we never get here. // Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE(); UNREACHABLE();
...@@ -960,7 +960,7 @@ class ChoiceNode: public RegExpNode { ...@@ -960,7 +960,7 @@ class ChoiceNode: public RegExpNode {
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start); BoyerMooreLookahead* bm, bool not_at_start);
bool being_calculated() { return being_calculated_; } bool being_calculated() { return being_calculated_; }
...@@ -970,7 +970,7 @@ class ChoiceNode: public RegExpNode { ...@@ -970,7 +970,7 @@ class ChoiceNode: public RegExpNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true; return true;
} }
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
virtual bool read_backward() { return false; } virtual bool read_backward() { return false; }
protected: protected:
...@@ -1028,9 +1028,9 @@ class NegativeLookaroundChoiceNode : public ChoiceNode { ...@@ -1028,9 +1028,9 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) { BoyerMooreLookahead* bm, bool not_at_start) {
alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm, alternatives_->at(1).node()->FillInBMInfo(compiler, offset, budget - 1, bm,
not_at_start); not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm); if (offset == 0) set_bm_info(not_at_start, bm);
} }
...@@ -1042,7 +1042,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode { ...@@ -1042,7 +1042,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return !is_first; return !is_first;
} }
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
}; };
...@@ -1062,14 +1062,14 @@ class LoopChoiceNode: public ChoiceNode { ...@@ -1062,14 +1062,14 @@ class LoopChoiceNode: public ChoiceNode {
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, virtual void FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start); BoyerMooreLookahead* bm, bool not_at_start);
RegExpNode* loop_node() { return loop_node_; } RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; } RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual bool read_backward() { return read_backward_; } virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler);
private: private:
// AddAlternative is made private for loop nodes because alternatives // AddAlternative is made private for loop nodes because alternatives
......
...@@ -1294,7 +1294,10 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) { ...@@ -1294,7 +1294,10 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) { bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
#ifdef V8_I18N_SUPPORT #ifdef V8_I18N_SUPPORT
if (unicode() && ignore_case()) { if (unicode() && ignore_case() && c >= kNonBmpStart) {
// BMP characters are handled in the case-insensitive TextEmitPass.
// Surrogate code units do not have case equivalents.
// Non-BMP characters need to be desugared into two uc16 parts.
USet* set = uset_open(c, c); USet* set = uset_open(c, c);
uset_closeOver(set, USET_CASE_INSENSITIVE); uset_closeOver(set, USET_CASE_INSENSITIVE);
uset_removeAllStrings(set); uset_removeAllStrings(set);
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
namespace unibrow { namespace unibrow {
typedef unsigned int uchar; typedef int32_t uchar;
typedef unsigned char byte; typedef uint8_t byte;
/** /**
* The max length of the result of converting the case of a single * The max length of the result of converting the case of a single
...@@ -130,7 +130,7 @@ class Utf16 { ...@@ -130,7 +130,7 @@ class Utf16 {
class Utf8 { class Utf8 {
public: public:
static inline uchar Length(uchar chr, int previous); static inline unsigned Length(uchar chr, int previous);
static inline unsigned EncodeOneByte(char* out, uint8_t c); static inline unsigned EncodeOneByte(char* out, uint8_t c);
static inline unsigned Encode(char* out, static inline unsigned Encode(char* out,
uchar c, uchar c,
......
...@@ -1382,7 +1382,7 @@ TEST(IsAscii) { ...@@ -1382,7 +1382,7 @@ TEST(IsAscii) {
template<typename Op, bool return_first> template<typename Op, bool return_first>
static uint16_t ConvertLatin1(uint16_t c) { static uint16_t ConvertLatin1(uint16_t c) {
uint32_t result[Op::kMaxWidth]; uc32 result[Op::kMaxWidth];
int chars; int chars;
chars = Op::Convert(c, 0, result, NULL); chars = Op::Convert(c, 0, result, NULL);
if (chars == 0) return 0; if (chars == 0) return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment