Commit 912c8eb0 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

* Reapply revisions 1383, 1384, 1391, 1398, 1401, 1402,

  1418, and 1419 from bleeding_edge, reverted in 1429.
* Fix of $1 accessor on sliced strings.
* Fix of lastParen method when last parenthesis did not match.
Review URL: http://codereview.chromium.org/43075

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1491 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 3c41aab1
......@@ -851,12 +851,13 @@ Handle<Map> Factory::ObjectLiteralMapFromCache(Handle<Context> context,
}
void Factory::SetRegExpData(Handle<JSRegExp> regexp,
JSRegExp::Type type,
Handle<String> source,
JSRegExp::Flags flags,
Handle<Object> data) {
Handle<FixedArray> store = NewFixedArray(JSRegExp::kDataSize);
void Factory::SetRegExpAtomData(Handle<JSRegExp> regexp,
JSRegExp::Type type,
Handle<String> source,
JSRegExp::Flags flags,
Handle<Object> data) {
Handle<FixedArray> store = NewFixedArray(JSRegExp::kAtomDataSize);
store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
store->set(JSRegExp::kSourceIndex, *source);
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value()));
......@@ -864,6 +865,25 @@ void Factory::SetRegExpData(Handle<JSRegExp> regexp,
regexp->set_data(*store);
}
void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
JSRegExp::Type type,
Handle<String> source,
JSRegExp::Flags flags,
int capture_count) {
Handle<FixedArray> store = NewFixedArray(JSRegExp::kIrregexpDataSize);
store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
store->set(JSRegExp::kSourceIndex, *source);
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value()));
store->set(JSRegExp::kIrregexpASCIICodeIndex, Heap::the_hole_value());
store->set(JSRegExp::kIrregexpUC16CodeIndex, Heap::the_hole_value());
store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(0));
store->set(JSRegExp::kIrregexpCaptureCountIndex,
Smi::FromInt(capture_count));
regexp->set_data(*store);
}
void Factory::ConfigureInstance(Handle<FunctionTemplateInfo> desc,
Handle<JSObject> instance,
......
......@@ -317,12 +317,20 @@ class Factory : public AllStatic {
Handle<FixedArray> keys);
// Creates a new FixedArray that holds the data associated with the
// regexp and stores it in the regexp.
static void SetRegExpData(Handle<JSRegExp> regexp,
JSRegExp::Type type,
Handle<String> source,
JSRegExp::Flags flags,
Handle<Object> data);
// atom regexp and stores it in the regexp.
static void SetRegExpAtomData(Handle<JSRegExp> regexp,
JSRegExp::Type type,
Handle<String> source,
JSRegExp::Flags flags,
Handle<Object> match_pattern);
// Creates a new FixedArray that holds the data associated with the
// irregexp regexp and stores it in the regexp.
static void SetRegExpIrregexpData(Handle<JSRegExp> regexp,
JSRegExp::Type type,
Handle<String> source,
JSRegExp::Flags flags,
int capture_count);
private:
static Handle<JSFunction> NewFunctionHelper(Handle<String> name,
......
This diff is collapsed.
......@@ -51,6 +51,7 @@ class RegExpImpl {
// Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what
// the implementation wants to store in the data field.
// Returns false if compilation fails.
static Handle<Object> Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flags);
......@@ -59,38 +60,46 @@ class RegExpImpl {
// This function calls the garbage collector if necessary.
static Handle<Object> Exec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
int index,
Handle<JSArray> lastMatchInfo);
// Call RegExp.prototyp.exec(string) in a loop.
// Used by String.prototype.match and String.prototype.replace.
// This function calls the garbage collector if necessary.
static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
Handle<String> subject,
Handle<JSArray> lastMatchInfo);
// Prepares a JSRegExp object with Irregexp-specific data.
static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags);
static void IrregexpPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
int capture_register_count);
static Handle<Object> AtomCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
Handle<String> match_pattern);
static void AtomCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
Handle<String> match_pattern);
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
int index,
Handle<JSArray> lastMatchInfo);
static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
Handle<String> subject,
Handle<JSArray> lastMatchInfo);
// Execute an Irregexp bytecode pattern.
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
int index,
Handle<JSArray> lastMatchInfo);
static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
Handle<String> subject,
Handle<JSArray> lastMatchInfo);
static void NewSpaceCollectionPrologue();
static void OldSpaceCollectionPrologue();
......@@ -101,26 +110,54 @@ class RegExpImpl {
static Handle<String> StringToTwoByte(Handle<String> pattern);
static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
static const int kIrregexpImplementationIndex = 0;
static const int kIrregexpNumberOfCapturesIndex = 1;
static const int kIrregexpNumberOfRegistersIndex = 2;
static const int kIrregexpCodeIndex = 3;
static const int kIrregexpDataLength = 4;
// Offsets in the lastMatchInfo array.
static const int kLastCaptureCount = 0;
static const int kLastSubject = 1;
static const int kLastInput = 2;
static const int kFirstCapture = 1;
static const int kLastMatchOverhead = 3;
static int GetCapture(FixedArray* array, int index) {
return Smi::cast(array->get(index + kFirstCapture))->value();
}
static void SetLastCaptureCount(FixedArray* array, int to) {
array->set(kLastCaptureCount, Smi::FromInt(to));
}
static void SetLastSubject(FixedArray* array, String* to) {
int capture_count = GetLastCaptureCount(array);
array->set(capture_count + kLastSubject, to);
}
static void SetLastInput(FixedArray* array, String* to) {
int capture_count = GetLastCaptureCount(array);
array->set(capture_count + kLastInput, to);
}
static void SetCapture(FixedArray* array, int index, int to) {
array->set(index + kFirstCapture, Smi::FromInt(to));
}
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static int IrregexpNumberOfCaptures(Handle<FixedArray> re);
static int IrregexpNumberOfRegisters(Handle<FixedArray> re);
static Handle<ByteArray> IrregexpByteCode(Handle<FixedArray> re);
static Handle<Code> IrregexpNativeCode(Handle<FixedArray> re);
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
static int IrregexpMaxRegisterCount(FixedArray* re);
static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
static int IrregexpNumberOfCaptures(FixedArray* re);
static int IrregexpNumberOfRegisters(FixedArray* re);
static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExecOnce(Handle<FixedArray> regexp,
int num_captures,
Handle<JSArray> lastMatchInfo,
Handle<String> subject16,
int previous_index,
int* ovector,
......@@ -134,6 +171,10 @@ class RegExpImpl {
int character_position,
int utf8_position);
// Used to access the lastMatchInfo array.
static int GetLastCaptureCount(FixedArray* array) {
return Smi::cast(array->get(kLastCaptureCount))->value();
}
// A one element cache of the last utf8_subject string and its length. The
// subject JS String object is cached in the heap. We also cache a
// translation between position and utf8 position.
......@@ -1319,11 +1360,25 @@ struct RegExpCompileData {
class RegExpEngine: public AllStatic {
public:
static Handle<FixedArray> Compile(RegExpCompileData* input,
bool ignore_case,
bool multiline,
Handle<String> pattern,
bool is_ascii);
struct CompilationResult {
explicit CompilationResult(const char* error_message)
: error_message(error_message),
code(Heap::the_hole_value()),
num_registers(0) {}
CompilationResult(Object* code, int registers)
: error_message(NULL),
code(code),
num_registers(registers) {}
const char* error_message;
Object* code;
int num_registers;
};
static CompilationResult Compile(RegExpCompileData* input,
bool ignore_case,
bool multiline,
Handle<String> pattern,
bool is_ascii);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};
......
# Copyright 2006-2008 the V8 project authors. All rights reserved.
# Copyright 2006-2009 the V8 project authors. All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
......@@ -99,3 +99,22 @@ python macro CHAR_CODE(str) = ord(str[1]);
# Accessors for original global properties that ensure they have been loaded.
const ORIGINAL_REGEXP = (global.RegExp, $RegExp);
const ORIGINAL_DATE = (global.Date, $Date);
# Constants used on an array to implement the properties of the RegExp object.
const REGEXP_NUMBER_OF_CAPTURES = 0;
const REGEXP_FIRST_CAPTURE = 1;
# We can't put macros in macros so we use constants here.
# REGEXP_NUMBER_OF_CAPTURES
macro NUMBER_OF_CAPTURES(array) = ((array)[0]);
# Last input and last subject are after the captures so we can omit them on
# results returned from global searches. Beware - these evaluate their
# arguments twice.
macro LAST_SUBJECT(array) = ((array)[(array)[0] + 1]);
macro LAST_INPUT(array) = ((array)[(array)[0] + 2]);
# REGEXP_FIRST_CAPTURE
macro CAPTURE(index) = (1 + (index));
const CAPTURE0 = 1;
const CAPTURE1 = 2;
......@@ -696,9 +696,20 @@ void JSRegExp::JSRegExpVerify() {
break;
}
case JSRegExp::IRREGEXP: {
bool is_native = FLAG_regexp_native;
#ifdef ARM
// No native regexp on arm yet.
is_native = false;
#endif
FixedArray* arr = FixedArray::cast(data());
Object* irregexp_data = arr->get(JSRegExp::kIrregexpDataIndex);
ASSERT(irregexp_data->IsFixedArray());
Object* ascii_data = arr->get(JSRegExp::kIrregexpASCIICodeIndex);
ASSERT(ascii_data->IsTheHole()
|| (is_native ? ascii_data->IsCode() : ascii_data->IsByteArray()));
Object* uc16_data = arr->get(JSRegExp::kIrregexpUC16CodeIndex);
ASSERT(uc16_data->IsTheHole()
|| (is_native ? uc16_data->IsCode() : uc16_data->IsByteArray()));
ASSERT(arr->get(JSRegExp::kIrregexpCaptureCountIndex)->IsSmi());
ASSERT(arr->get(JSRegExp::kIrregexpMaxRegisterCountIndex)->IsSmi());
break;
}
default:
......
......@@ -2338,6 +2338,13 @@ Object* JSRegExp::DataAt(int index) {
}
void JSRegExp::SetDataAt(int index, Object* value) {
ASSERT(TypeTag() != NOT_COMPILED);
ASSERT(index >= kDataIndex); // Only implementation data can be set this way.
FixedArray::cast(data())->set(index, value);
}
bool JSObject::HasFastElements() {
return !elements()->IsDictionary();
}
......
......@@ -4888,6 +4888,22 @@ Object* JSArray::Initialize(int capacity) {
}
void JSArray::EnsureSize(int required_size) {
Handle<JSArray> self(this);
ASSERT(HasFastElements());
if (elements()->length() >= required_size) return;
Handle<FixedArray> old_backing(elements());
int old_size = old_backing->length();
// Doubling in size would be overkill, but leave some slack to avoid
// constantly growing.
int new_size = required_size + (required_size >> 3);
Handle<FixedArray> new_backing = Factory::NewFixedArray(new_size);
// Can't use this any more now because we may have had a GC!
for (int i = 0; i < old_size; i++) new_backing->set(i, old_backing->get(i));
self->SetContent(*new_backing);
}
// Computes the new capacity when expanding the elements of a JSObject.
static int NewElementsCapacity(int old_capacity) {
// (old_capacity + 50%) + 16
......
......@@ -2966,6 +2966,19 @@ class JSValue: public JSObject {
};
// Regular expressions
// The regular expression holds a single reference to a FixedArray in
// the kDataOffset field.
// The FixedArray contains the following data:
// - tag : type of regexp implementation (not compiled yet, atom or irregexp)
// - reference to the original source string
// - reference to the original flag string
// If it is an atom regexp
// - a reference to a literal string to search for
// If it is an irregexp regexp:
// - a reference to code for ASCII inputs (bytecode or compiled).
// - a reference to code for UC16 inputs (bytecode or compiled).
// - max number of registers used by irregexp implementations.
// - number of capture registers (output values) of the regexp.
class JSRegExp: public JSObject {
public:
// Meaning of Type:
......@@ -2993,6 +3006,8 @@ class JSRegExp: public JSObject {
inline Flags GetFlags();
inline String* Pattern();
inline Object* DataAt(int index);
// Set implementation data after the object has been prepared.
inline void SetDataAt(int index, Object* value);
static inline JSRegExp* cast(Object* obj);
......@@ -3004,14 +3019,29 @@ class JSRegExp: public JSObject {
static const int kDataOffset = JSObject::kHeaderSize;
static const int kSize = kDataOffset + kIntSize;
// Indices in the data array.
static const int kTagIndex = 0;
static const int kSourceIndex = kTagIndex + 1;
static const int kFlagsIndex = kSourceIndex + 1;
// These two are the same since the same entry is shared for
// different purposes in different types of regexps.
static const int kAtomPatternIndex = kFlagsIndex + 1;
static const int kIrregexpDataIndex = kFlagsIndex + 1;
static const int kDataSize = kAtomPatternIndex + 1;
static const int kDataIndex = kFlagsIndex + 1;
// The data fields are used in different ways depending on the
// value of the tag.
// Atom regexps (literal strings).
static const int kAtomPatternIndex = kDataIndex;
static const int kAtomDataSize = kAtomPatternIndex + 1;
// Irregexp compiled code or bytecode for ASCII.
static const int kIrregexpASCIICodeIndex = kDataIndex;
// Irregexp compiled code or bytecode for UC16.
static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
// Maximal number of registers used by either ASCII or UC16.
// Only used to check that there is enough stack space
static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2;
// Number of captures in the compiled regexp.
static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
static const int kIrregexpDataSize = kIrregexpCaptureCountIndex + 1;
};
......@@ -3806,6 +3836,10 @@ class JSArray: public JSObject {
// Casting.
static inline JSArray* cast(Object* obj);
// Uses handles. Ensures that the fixed array backing the JSArray has at
// least the stated size.
void EnsureSize(int minimum_size_of_backing_fixed_array);
// Dispatched behavior.
#ifdef DEBUG
void JSArrayPrint();
......
This diff is collapsed.
......@@ -930,14 +930,21 @@ static Object* Runtime_InitializeConstContextSlot(Arguments args) {
static Object* Runtime_RegExpExec(Arguments args) {
HandleScope scope;
ASSERT(args.length() == 3);
ASSERT(args.length() == 4);
CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]);
Handle<JSRegExp> regexp(raw_regexp);
CONVERT_CHECKED(String, raw_subject, args[1]);
Handle<String> subject(raw_subject);
Handle<Object> index(args[2]);
ASSERT(index->IsNumber());
Handle<Object> result = RegExpImpl::Exec(regexp, subject, index);
// Due to the way the JS files are constructed this must be less than the
// length of a string, i.e. it is always a Smi. We check anyway for security.
CONVERT_CHECKED(Smi, index, args[2]);
CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]);
Handle<JSArray> last_match_info(raw_last_match_info);
CHECK(last_match_info->HasFastElements());
Handle<Object> result = RegExpImpl::Exec(regexp,
subject,
index->value(),
last_match_info);
if (result.is_null()) return Failure::Exception();
return *result;
}
......@@ -945,12 +952,16 @@ static Object* Runtime_RegExpExec(Arguments args) {
static Object* Runtime_RegExpExecGlobal(Arguments args) {
HandleScope scope;
ASSERT(args.length() == 2);
ASSERT(args.length() == 3);
CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]);
Handle<JSRegExp> regexp(raw_regexp);
CONVERT_CHECKED(String, raw_subject, args[1]);
Handle<String> subject(raw_subject);
Handle<Object> result = RegExpImpl::ExecGlobal(regexp, subject);
CONVERT_CHECKED(JSArray, raw_last_match_info, args[2]);
Handle<JSArray> last_match_info(raw_last_match_info);
CHECK(last_match_info->HasFastElements());
Handle<Object> result =
RegExpImpl::ExecGlobal(regexp, subject, last_match_info);
if (result.is_null()) return Failure::Exception();
return *result;
}
......
......@@ -137,8 +137,8 @@ namespace v8 { namespace internal {
\
/* Regular expressions */ \
F(RegExpCompile, 3) \
F(RegExpExec, 3) \
F(RegExpExecGlobal, 2) \
F(RegExpExec, 4) \
F(RegExpExecGlobal, 3) \
\
/* Strings */ \
F(StringCharCodeAt, 2) \
......
This diff is collapsed.
......@@ -154,3 +154,14 @@ for (var i = 1; i <= 9; i++) {
}
assertEquals("", RegExp['$' + (i)], "$" + i);
}
RegExp.multiline = "foo";
assertTrue(typeof RegExp.multiline == typeof Boolean(), "RegExp.multiline coerces values to booleans");
RegExp.input = Number();
assertTrue(typeof RegExp.input == typeof String(), "RegExp.input coerces values to booleans");
// Ensure that we save the correct string as the last subject when
// we do a match on a sliced string (the top one not the underlying).
var foo = "lsdfj sldkfj sdklfj læsdfjl sdkfjlsdk fjsdl fjsdljskdj flsj flsdkj flskd regexp: /foobar/\nldkfj sdlkfj sdkl";
assertTrue(/^([a-z]+): (.*)/.test(foo.substring(foo.indexOf("regexp:"))), "regexp: setup");
assertEquals("regexp", RegExp.$1, "RegExp.$1");
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Regexp shouldn't use String.prototype.slice()
var s = new String("foo");
assertEquals("f", s.slice(0,1));
String.prototype.slice = function() { return "x"; }
assertEquals("x", s.slice(0,1));
assertEquals("g", /g/.exec("gg"));
// Regexp shouldn't use String.prototype.charAt()
var f1 = new RegExp("f", "i");
assertEquals("F", f1.exec("F"));
assertEquals("f", "foo".charAt(0));
String.prototype.charAt = function(idx) { return 'g'; };
assertEquals("g", "foo".charAt(0));
var f2 = new RegExp("[g]", "i");
assertEquals("G", f2.exec("G"));
assertTrue(f2.ignoreCase);
// On the other hand test is defined in a semi-coherent way as a call to exec.
// 15.10.6.3
// We match other browsers in using the original value of RegExp.prototype.exec.
// I.e., RegExp.prototype.test shouldn't use the current value of
// RegExp.prototype.exec.
RegExp.prototype.exec = function(string) { return 'x'; }
assertFalse(/f/.test('x'));
......@@ -104,7 +104,7 @@ def ExpandConstants(lines, constants):
def ExpandMacros(lines, macros):
for name, macro in macros.items():
start = lines.find(name, 0)
start = lines.find(name + '(', 0)
while start != -1:
# Scan over the arguments
assert lines[start + len(name)] == '('
......@@ -132,7 +132,7 @@ def ExpandMacros(lines, macros):
result = macro.expand(mapping)
# Replace the occurrence of the macro with the expansion
lines = lines[:start] + result + lines[end:]
start = lines.find(name, end)
start = lines.find(name + '(', end)
return lines
class TextMacro:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment