Commit 35eec7c7 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Reland sticky regexps https://codereview.chromium.org/567313003/

R=svenpanne@chromium.org, yangguo@chromium.org
BUG=

Review URL: https://codereview.chromium.org/580383003

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@24065 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent bea452f2
......@@ -203,6 +203,7 @@ class Genesis BASE_EMBEDDED {
// New context initialization. Used for creating a context from scratch.
void InitializeGlobal(Handle<GlobalObject> global_object,
Handle<JSFunction> empty_function);
void InitializeExperimentalGlobal();
// Installs the contents of the native .js files on the global objects.
// Used for creating a context from scratch.
void InstallNativeFunctions();
......@@ -1351,6 +1352,20 @@ void Genesis::InstallTypedArray(
}
void Genesis::InitializeExperimentalGlobal() {
// TODO(erikcorry): Move this into Genesis::InitializeGlobal once we no
// longer need to live behind a flag.
Handle<JSObject> builtins(native_context()->builtins());
Handle<HeapObject> flag(
FLAG_harmony_regexps ? heap()->true_value() : heap()->false_value());
PropertyAttributes attributes =
static_cast<PropertyAttributes>(DONT_DELETE | READ_ONLY);
Runtime::DefineObjectProperty(builtins, factory()->harmony_regexps_string(),
flag, attributes).Assert();
}
bool Genesis::CompileBuiltin(Isolate* isolate, int index) {
Vector<const char> name = Natives::GetScriptName(index);
Handle<String> source_code =
......@@ -2651,6 +2666,7 @@ Genesis::Genesis(Isolate* isolate,
// Install experimental natives.
if (!InstallExperimentalNatives()) return;
InitializeExperimentalGlobal();
// We can't (de-)serialize typed arrays currently, but we are lucky: The state
// of the random number generator needs no initialization during snapshot
......
......@@ -344,7 +344,7 @@ MaybeHandle<SharedFunctionInfo> CompilationCache::LookupEval(
MaybeHandle<FixedArray> CompilationCache::LookupRegExp(Handle<String> source,
JSRegExp::Flags flags) {
JSRegExp::Flags flags) {
if (!IsEnabled()) return MaybeHandle<FixedArray>();
return reg_exp_.Lookup(source, flags);
......
......@@ -162,6 +162,7 @@ DEFINE_BOOL(harmony_arrow_functions, false, "enable harmony arrow functions")
DEFINE_BOOL(harmony_classes, false, "enable harmony classes")
DEFINE_BOOL(harmony_object_literals, false,
"enable harmony object literal extensions")
DEFINE_BOOL(harmony_regexps, false, "enable regexp-related harmony features")
DEFINE_BOOL(harmony, false, "enable all harmony features (except proxies)")
DEFINE_IMPLICATION(harmony, harmony_scoping)
......@@ -174,6 +175,7 @@ DEFINE_IMPLICATION(harmony, harmony_arrays)
DEFINE_IMPLICATION(harmony, harmony_arrow_functions)
DEFINE_IMPLICATION(harmony, harmony_classes)
DEFINE_IMPLICATION(harmony, harmony_object_literals)
DEFINE_IMPLICATION(harmony, harmony_regexps)
DEFINE_IMPLICATION(harmony_modules, harmony_scoping)
DEFINE_IMPLICATION(harmony_classes, harmony_scoping)
DEFINE_IMPLICATION(harmony_classes, harmony_object_literals)
......
......@@ -287,6 +287,8 @@ namespace internal {
V(global_string, "global") \
V(ignore_case_string, "ignoreCase") \
V(multiline_string, "multiline") \
V(sticky_string, "sticky") \
V(harmony_regexps_string, "harmony_regexps") \
V(input_string, "input") \
V(index_string, "index") \
V(last_index_string, "lastIndex") \
......
......@@ -70,6 +70,9 @@ static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
case 'm':
flags |= JSRegExp::MULTILINE;
break;
case 'y':
if (FLAG_harmony_regexps) flags |= JSRegExp::STICKY;
break;
}
}
return JSRegExp::Flags(flags);
......@@ -185,12 +188,14 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
if (parse_result.simple &&
!flags.is_ignore_case() &&
!flags.is_sticky() &&
!HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true;
} else if (parse_result.tree->IsAtom() &&
!flags.is_ignore_case() &&
!flags.is_sticky() &&
parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
......@@ -430,7 +435,8 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
}
RegExpEngine::CompilationResult result = RegExpEngine::Compile(
&compile_data, flags.is_ignore_case(), flags.is_global(),
flags.is_multiline(), pattern, sample_subject, is_one_byte, &zone);
flags.is_multiline(), flags.is_sticky(), pattern, sample_subject,
is_one_byte, &zone);
if (result.error_message != NULL) {
// Unable to compile regexp.
Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
......@@ -6027,8 +6033,8 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {
RegExpEngine::CompilationResult RegExpEngine::Compile(
RegExpCompileData* data, bool ignore_case, bool is_global,
bool is_multiline, Handle<String> pattern, Handle<String> sample_subject,
bool is_one_byte, Zone* zone) {
bool is_multiline, bool is_sticky, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte, Zone* zone) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
return IrregexpRegExpTooBig(zone->isolate());
}
......@@ -6055,9 +6061,9 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
bool is_end_anchored = data->tree->IsAnchoredAtEnd();
bool is_start_anchored = data->tree->IsAnchoredAtStart();
int max_length = data->tree->max_match();
if (!is_start_anchored) {
if (!is_start_anchored && !is_sticky) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning.
// this expression is anchored at the beginning or sticky.
RegExpNode* loop_node =
RegExpQuantifier::ToNode(0,
RegExpTree::kInfinity,
......
......@@ -1661,7 +1661,7 @@ class RegExpEngine: public AllStatic {
};
static CompilationResult Compile(RegExpCompileData* input, bool ignore_case,
bool global, bool multiline,
bool global, bool multiline, bool sticky,
Handle<String> pattern,
Handle<String> sample_subject,
bool is_one_byte, Zone* zone);
......
......@@ -8034,7 +8034,13 @@ class JSRegExp: public JSObject {
// IRREGEXP: Compiled with Irregexp.
// IRREGEXP_NATIVE: Compiled to native code with Irregexp.
enum Type { NOT_COMPILED, ATOM, IRREGEXP };
enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
enum Flag {
NONE = 0,
GLOBAL = 1,
IGNORE_CASE = 2,
MULTILINE = 4,
STICKY = 8
};
class Flags {
public:
......@@ -8042,6 +8048,7 @@ class JSRegExp: public JSObject {
bool is_global() { return (value_ & GLOBAL) != 0; }
bool is_ignore_case() { return (value_ & IGNORE_CASE) != 0; }
bool is_multiline() { return (value_ & MULTILINE) != 0; }
bool is_sticky() { return (value_ & STICKY) != 0; }
uint32_t value() { return value_; }
private:
uint32_t value_;
......
......@@ -22,6 +22,8 @@ function DoConstructRegExp(object, pattern, flags) {
flags = (pattern.global ? 'g' : '')
+ (pattern.ignoreCase ? 'i' : '')
+ (pattern.multiline ? 'm' : '');
if (harmony_regexps)
flags += (pattern.sticky ? 'y' : '');
pattern = pattern.source;
}
......@@ -31,6 +33,7 @@ function DoConstructRegExp(object, pattern, flags) {
var global = false;
var ignoreCase = false;
var multiline = false;
var sticky = false;
for (var i = 0; i < flags.length; i++) {
var c = %_CallFunction(flags, i, StringCharAt);
switch (c) {
......@@ -52,12 +55,18 @@ function DoConstructRegExp(object, pattern, flags) {
}
multiline = true;
break;
case 'y':
if (!harmony_regexps || sticky) {
throw MakeSyntaxError("invalid_regexp_flags", [flags]);
}
sticky = true;
break;
default:
throw MakeSyntaxError("invalid_regexp_flags", [flags]);
}
}
%RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
%RegExpInitializeObject(object, pattern, global, ignoreCase, multiline, sticky);
// Call internal function to compile the pattern.
%RegExpCompile(object, pattern, flags);
......@@ -159,8 +168,8 @@ function RegExpExec(string) {
// algorithm, step 5) even if the value is discarded for non-global RegExps.
var i = TO_INTEGER(lastIndex);
var global = this.global;
if (global) {
var updateLastIndex = this.global || (harmony_regexps && this.sticky);
if (updateLastIndex) {
if (i < 0 || i > string.length) {
this.lastIndex = 0;
return null;
......@@ -179,7 +188,7 @@ function RegExpExec(string) {
// Successful match.
lastMatchInfoOverride = null;
if (global) {
if (updateLastIndex) {
this.lastIndex = lastMatchInfo[CAPTURE1];
}
RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
......@@ -207,7 +216,7 @@ function RegExpTest(string) {
// algorithm, step 5) even if the value is discarded for non-global RegExps.
var i = TO_INTEGER(lastIndex);
if (this.global) {
if (this.global || (harmony_regexps && this.sticky)) {
if (i < 0 || i > string.length) {
this.lastIndex = 0;
return false;
......@@ -222,12 +231,13 @@ function RegExpTest(string) {
this.lastIndex = lastMatchInfo[CAPTURE1];
return true;
} else {
// Non-global regexp.
// Remove irrelevant preceeding '.*' in a non-global test regexp.
// The expression checks whether this.source starts with '.*' and
// that the third char is not a '?'.
// Non-global, non-sticky regexp.
// Remove irrelevant preceeding '.*' in a test regexp. The expression
// checks whether this.source starts with '.*' and that the third char is
// not a '?'. But see https://code.google.com/p/v8/issues/detail?id=3560
var regexp = this;
if (%_StringCharCodeAt(regexp.source, 0) == 46 && // '.'
if (regexp.source.length >= 3 &&
%_StringCharCodeAt(regexp.source, 0) == 46 && // '.'
%_StringCharCodeAt(regexp.source, 1) == 42 && // '*'
%_StringCharCodeAt(regexp.source, 2) != 63) { // '?'
regexp = TrimRegExp(regexp);
......@@ -264,6 +274,7 @@ function RegExpToString() {
if (this.global) result += 'g';
if (this.ignoreCase) result += 'i';
if (this.multiline) result += 'm';
if (harmony_regexps && this.sticky) result += 'y';
return result;
}
......
......@@ -2548,7 +2548,7 @@ RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {
HandleScope scope(isolate);
DCHECK(args.length() == 5);
DCHECK(args.length() == 6);
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
// If source is the empty string we set it to "(?:)" instead as
......@@ -2564,9 +2564,13 @@ RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {
CONVERT_ARG_HANDLE_CHECKED(Object, multiline, 4);
if (!multiline->IsTrue()) multiline = isolate->factory()->false_value();
CONVERT_ARG_HANDLE_CHECKED(Object, sticky, 5);
if (!sticky->IsTrue()) sticky = isolate->factory()->false_value();
Map* map = regexp->map();
Object* constructor = map->constructor();
if (constructor->IsJSFunction() &&
if (!FLAG_harmony_regexps &&
constructor->IsJSFunction() &&
JSFunction::cast(constructor)->initial_map() == map) {
// If we still have the original map, set in-object properties directly.
regexp->InObjectPropertyAtPut(JSRegExp::kSourceFieldIndex, *source);
......@@ -2583,7 +2587,11 @@ RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {
return *regexp;
}
// Map has changed, so use generic, but slower, method.
// Map has changed, so use generic, but slower, method. We also end here if
// the --harmony-regexp flag is set, because the initial map does not have
// space for the 'sticky' flag, since it is from the snapshot, but must work
// both with and without --harmony-regexp. When sticky comes out from under
// the flag, we will be able to use the fast initial map.
PropertyAttributes final =
static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);
PropertyAttributes writable =
......@@ -2598,6 +2606,10 @@ RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {
regexp, factory->ignore_case_string(), ignoreCase, final).Check();
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->multiline_string(), multiline, final).Check();
if (FLAG_harmony_regexps) {
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->sticky_string(), sticky, final).Check();
}
JSObject::SetOwnPropertyIgnoreAttributes(
regexp, factory->last_index_string(), zero, writable).Check();
return *regexp;
......
......@@ -154,7 +154,7 @@ namespace internal {
/* Regular expressions */ \
F(RegExpCompile, 3, 1) \
F(RegExpExecMultiple, 4, 1) \
F(RegExpInitializeObject, 5, 1) \
F(RegExpInitializeObject, 6, 1) \
\
/* JSON */ \
F(ParseJson, 1, 1) \
......
......@@ -518,7 +518,7 @@ static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
NewStringFromUtf8(CStrVector(input)).ToHandleChecked();
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
RegExpEngine::Compile(&compile_data, false, false, multiline, pattern,
RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern,
sample_subject, is_one_byte, zone);
return compile_data.node;
}
......
// Copyright 2014 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Flags: --harmony-regexps
var re = /foo.bar/;
assertTrue(!!"foo*bar".match(re));
assertTrue(!!"..foo*bar".match(re));
var plain = /foobar/;
assertTrue(!!"foobar".match(plain));
assertTrue(!!"..foobar".match(plain));
var sticky = /foo.bar/y;
assertTrue(!!"foo*bar".match(sticky));
assertEquals(0, sticky.lastIndex);
assertFalse(!!"..foo*bar".match(sticky));
var stickyplain = /foobar/y;
assertTrue(!!"foobar".match(stickyplain));
assertEquals(0, stickyplain.lastIndex);
assertFalse(!!"..foobar".match(stickyplain));
var global = /foo.bar/g;
assertTrue(global.test("foo*bar"));
assertFalse(global.test("..foo*bar"));
global.lastIndex = 0;
assertTrue(global.test("..foo*bar"));
var plainglobal = /foobar/g;
assertTrue(plainglobal.test("foobar"));
assertFalse(plainglobal.test("foobar"));
plainglobal.lastIndex = 0;
assertTrue(plainglobal.test("foobar"));
var stickyglobal = /foo.bar/gy;
assertTrue(stickyglobal.test("foo*bar"));
assertEquals(7, stickyglobal.lastIndex);
assertFalse(stickyglobal.test("..foo*bar"));
stickyglobal.lastIndex = 0;
assertFalse(stickyglobal.test("..foo*bar"));
stickyglobal.lastIndex = 2;
assertTrue(stickyglobal.test("..foo*bar"));
assertEquals(9, stickyglobal.lastIndex);
var stickyplainglobal = /foobar/yg;
assertTrue(stickyplainglobal.sticky);
stickyplainglobal.sticky = false;
assertTrue(stickyplainglobal.test("foobar"));
assertEquals(6, stickyplainglobal.lastIndex);
assertFalse(stickyplainglobal.test("..foobar"));
stickyplainglobal.lastIndex = 0;
assertFalse(stickyplainglobal.test("..foobar"));
stickyplainglobal.lastIndex = 2;
assertTrue(stickyplainglobal.test("..foobar"));
assertEquals(8, stickyplainglobal.lastIndex);
assertEquals("/foo.bar/gy", "" + stickyglobal);
assertEquals("/foo.bar/g", "" + global);
assertTrue(stickyglobal.sticky);
stickyglobal.sticky = false;
assertTrue(stickyglobal.sticky);
var stickyglobal2 = new RegExp("foo.bar", "gy");
assertTrue(stickyglobal2.test("foo*bar"));
assertEquals(7, stickyglobal2.lastIndex);
assertFalse(stickyglobal2.test("..foo*bar"));
stickyglobal2.lastIndex = 0;
assertFalse(stickyglobal2.test("..foo*bar"));
stickyglobal2.lastIndex = 2;
assertTrue(stickyglobal2.test("..foo*bar"));
assertEquals(9, stickyglobal2.lastIndex);
assertEquals("/foo.bar/gy", "" + stickyglobal2);
assertTrue(stickyglobal2.sticky);
stickyglobal2.sticky = false;
assertTrue(stickyglobal2.sticky);
sticky.lastIndex = -1; // Causes sticky regexp to fail fast
assertFalse(sticky.test("..foo.bar"));
assertEquals(0, sticky.lastIndex);
sticky.lastIndex = -1; // Causes sticky regexp to fail fast
assertFalse(!!sticky.exec("..foo.bar"));
assertEquals(0, sticky.lastIndex);
// ES6 draft says: Even when the y flag is used with a pattern, ^ always
// matches only at the beginning of Input, or (if Multiline is true) at the
// beginning of a line.
var hat = /^foo/y;
hat.lastIndex = 2;
assertFalse(hat.test("..foo"));
var mhat = /^foo/my;
mhat.lastIndex = 2;
assertFalse(mhat.test("..foo"));
mhat.lastIndex = 2;
assertTrue(mhat.test(".\nfoo"));
// Copyright 2014 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Test that sticky regexp support is not affecting V8 when the
// --harmony-regexps flag is not on.
assertThrows(function() { eval("/foo.bar/y"); }, SyntaxError);
assertThrows(function() { eval("/foobar/y"); }, SyntaxError);
assertThrows(function() { eval("/foo.bar/gy"); }, SyntaxError);
assertThrows(function() { eval("/foobar/gy"); }, SyntaxError);
assertThrows(function() { new RegExp("foo.bar", "y"); }, SyntaxError);
assertThrows(function() { new RegExp("foobar", "y"); }, SyntaxError);
assertThrows(function() { new RegExp("foo.bar", "gy"); }, SyntaxError);
assertThrows(function() { new RegExp("foobar", "gy"); }, SyntaxError);
var re = /foo.bar/;
assertEquals("/foo.bar/", "" + re);
var plain = /foobar/;
assertEquals("/foobar/", "" + plain);
re.compile("foo.bar");
assertEquals(void 0, re.sticky);
var global = /foo.bar/g;
assertEquals("/foo.bar/g", "" + global);
var plainglobal = /foobar/g;
assertEquals("/foobar/g", "" + plainglobal);
assertEquals(void 0, re.sticky);
re.sticky = true; // Has no effect on the regexp, just sets a property.
assertTrue(re.sticky);
assertTrue(re.test("..foo.bar"));
re.lastIndex = -1; // Ignored for non-global, non-sticky.
assertTrue(re.test("..foo.bar"));
assertEquals(-1, re.lastIndex);
re.lastIndex = -1; // Ignored for non-global, non-sticky.
assertTrue(!!re.exec("..foo.bar"));
assertEquals(-1, re.lastIndex);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment