Reverted the changes to jscre because the arm cross-compiler dies with

an internal error when compiling templateified jscre.  We really need
to update that damn arm compiler.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@478 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 89ac41af
...@@ -223,6 +223,7 @@ void Heap::ReportStatisticsAfterGC() { ...@@ -223,6 +223,7 @@ void Heap::ReportStatisticsAfterGC() {
void Heap::GarbageCollectionPrologue() { void Heap::GarbageCollectionPrologue() {
RegExpImpl::NewSpaceCollectionPrologue();
gc_count_++; gc_count_++;
#ifdef DEBUG #ifdef DEBUG
ASSERT(allocation_allowed_ && gc_state_ == NOT_IN_GC); ASSERT(allocation_allowed_ && gc_state_ == NOT_IN_GC);
...@@ -423,6 +424,7 @@ void Heap::MarkCompact(GCTracer* tracer) { ...@@ -423,6 +424,7 @@ void Heap::MarkCompact(GCTracer* tracer) {
void Heap::MarkCompactPrologue() { void Heap::MarkCompactPrologue() {
CompilationCache::MarkCompactPrologue(); CompilationCache::MarkCompactPrologue();
RegExpImpl::OldSpaceCollectionPrologue();
Top::MarkCompactPrologue(); Top::MarkCompactPrologue();
ThreadManager::MarkCompactPrologue(); ThreadManager::MarkCompactPrologue();
} }
......
...@@ -65,6 +65,27 @@ static void JSREFree(void* p) { ...@@ -65,6 +65,27 @@ static void JSREFree(void* p) {
} }
String* RegExpImpl::last_ascii_string_ = NULL;
String* RegExpImpl::two_byte_cached_string_ = NULL;
void RegExpImpl::NewSpaceCollectionPrologue() {
// The two byte string is always in the old space. The Ascii string may be
// in either place. If it is in the old space we don't need to do anything.
if (Heap::InNewSpace(last_ascii_string_)) {
// Invalidate the cache.
last_ascii_string_ = NULL;
two_byte_cached_string_ = NULL;
}
}
void RegExpImpl::OldSpaceCollectionPrologue() {
last_ascii_string_ = NULL;
two_byte_cached_string_ = NULL;
}
Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
Handle<String> pattern, Handle<String> pattern,
Handle<String> flags, Handle<String> flags,
...@@ -81,6 +102,47 @@ Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, ...@@ -81,6 +102,47 @@ Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
} }
// Converts a source string to a 16 bit flat string or a SlicedString containing
// a 16 bit flat string).
Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) {
if (*subject == last_ascii_string_) {
ASSERT(two_byte_cached_string_ != NULL);
return Handle<String>(String::cast(two_byte_cached_string_));
}
Handle<String> two_byte_string = StringToTwoByte(subject);
last_ascii_string_ = *subject;
two_byte_cached_string_ = *two_byte_string;
return two_byte_string;
}
// Converts a source string to a 16 bit flat string or a SlicedString containing
// a 16 bit flat string).
Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) {
if (!pattern->IsFlat()) {
FlattenString(pattern);
}
Handle<String> flat_string(pattern->IsConsString() ?
String::cast(ConsString::cast(*pattern)->first()) :
*pattern);
ASSERT(!flat_string->IsConsString());
ASSERT(flat_string->IsSeqString() || flat_string->IsSlicedString() ||
flat_string->IsExternalString());
if (!flat_string->IsAsciiRepresentation()) {
return flat_string;
}
Handle<String> two_byte_string =
Factory::NewRawTwoByteString(flat_string->length(), TENURED);
static StringInputBuffer convert_to_two_byte_buffer;
convert_to_two_byte_buffer.Reset(*flat_string);
for (int i = 0; convert_to_two_byte_buffer.has_more(); i++) {
two_byte_string->Set(i, convert_to_two_byte_buffer.GetNext());
}
return two_byte_string;
}
unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char; unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char;
...@@ -127,14 +189,7 @@ Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, ...@@ -127,14 +189,7 @@ Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) { Handle<String> subject) {
switch (regexp->type_tag()) { switch (regexp->type_tag()) {
case JSRegExp::JSCRE: case JSRegExp::JSCRE:
FlattenString(subject); return JsreExecGlobal(regexp, subject);
if (subject->IsAsciiRepresentation()) {
Vector<const char> contents = subject->ToAsciiVector();
return JsreExecGlobal(regexp, subject, contents);
} else {
Vector<const uc16> contents = subject->ToUC16Vector();
return JsreExecGlobal(regexp, subject, contents);
}
case JSRegExp::ATOM: case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject); return AtomExecGlobal(regexp, subject);
default: default:
...@@ -213,6 +268,8 @@ Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re, ...@@ -213,6 +268,8 @@ Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
if (flags->Get(i) == 'm') multiline_option = JSRegExpMultiline; if (flags->Get(i) == 'm') multiline_option = JSRegExpMultiline;
} }
Handle<String> two_byte_pattern = StringToTwoByte(pattern);
unsigned number_of_captures; unsigned number_of_captures;
const char* error_message = NULL; const char* error_message = NULL;
...@@ -222,31 +279,14 @@ Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re, ...@@ -222,31 +279,14 @@ Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
bool first_time = true; bool first_time = true;
while (true) { while (true) {
first_time = false;
malloc_failure = Failure::Exception(); malloc_failure = Failure::Exception();
if (pattern->IsAsciiRepresentation()) { code = jsRegExpCompile(two_byte_pattern->GetTwoByteData(),
Vector<const char> contents = pattern->ToAsciiVector(); pattern->length(), case_option,
code = jsRegExpCompile(contents.start(), multiline_option, &number_of_captures,
contents.length(), &error_message, &JSREMalloc, &JSREFree);
case_option,
multiline_option,
&number_of_captures,
&error_message,
&JSREMalloc,
&JSREFree);
} else {
Vector<const uc16> contents = pattern->ToUC16Vector();
code = jsRegExpCompile(contents.start(),
contents.length(),
case_option,
multiline_option,
&number_of_captures,
&error_message,
&JSREMalloc,
&JSREFree);
}
if (code == NULL) { if (code == NULL) {
if (first_time && malloc_failure->IsRetryAfterGC()) { if (first_time && malloc_failure->IsRetryAfterGC()) {
first_time = false;
if (!Heap::CollectGarbage(malloc_failure->requested(), if (!Heap::CollectGarbage(malloc_failure->requested(),
malloc_failure->allocation_space())) { malloc_failure->allocation_space())) {
// TODO(1181417): Fix this. // TODO(1181417): Fix this.
...@@ -287,12 +327,11 @@ Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re, ...@@ -287,12 +327,11 @@ Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
} }
template <typename T>
Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp, Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
int num_captures, int num_captures,
Handle<String> subject, Handle<String> subject,
int previous_index, int previous_index,
Vector<const T> contents, const uc16* two_byte_subject,
int* offsets_vector, int* offsets_vector,
int offsets_vector_length) { int offsets_vector_length) {
int rc; int rc;
...@@ -304,12 +343,12 @@ Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp, ...@@ -304,12 +343,12 @@ Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
LOG(RegExpExecEvent(regexp, previous_index, subject)); LOG(RegExpExecEvent(regexp, previous_index, subject));
rc = jsRegExpExecute<T>(js_regexp, rc = jsRegExpExecute(js_regexp,
contents.start(), two_byte_subject,
contents.length(), subject->length(),
previous_index, previous_index,
offsets_vector, offsets_vector,
offsets_vector_length); offsets_vector_length);
} }
// The KJS JavaScript engine returns null (ie, a failed match) when // The KJS JavaScript engine returns null (ie, a failed match) when
...@@ -391,29 +430,19 @@ Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp, ...@@ -391,29 +430,19 @@ Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
int previous_index = static_cast<int>(DoubleToInteger(index->Number())); int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
FlattenString(subject); Handle<String> subject16 = CachedStringToTwoByte(subject);
if (subject->IsAsciiRepresentation()) {
Vector<const char> contents = subject->ToAsciiVector(); Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
Handle<Object> result(JsreExecOnce(regexp, num_captures, subject, previous_index,
previous_index, subject16->GetTwoByteData(),
contents, offsets.vector(), offsets.length()));
offsets.vector(), offsets.length()));
return result; return result;
} else {
Vector<const uc16> contents = subject->ToUC16Vector();
Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
previous_index,
contents,
offsets.vector(), offsets.length()));
return result;
}
} }
template <typename T>
Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp, Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject, Handle<String> subject) {
Vector<const T> contents) {
// Prepare space for the return values. // Prepare space for the return values.
int num_captures = JsreCapture(regexp); int num_captures = JsreCapture(regexp);
...@@ -425,19 +454,17 @@ Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp, ...@@ -425,19 +454,17 @@ Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
int i = 0; int i = 0;
Handle<Object> matches; Handle<Object> matches;
Handle<String> subject16 = CachedStringToTwoByte(subject);
do { do {
if (previous_index > subject->length() || previous_index < 0) { if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the // Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match. // string length, there is no match.
matches = Factory::null_value(); matches = Factory::null_value();
} else { } else {
matches = JsreExecOnce<T>(regexp, matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
num_captures, subject16->GetTwoByteData(),
subject, offsets.vector(), offsets.length());
previous_index,
contents,
offsets.vector(),
offsets.length());
if (matches->IsJSArray()) { if (matches->IsJSArray()) {
SetElement(result, i, matches); SetElement(result, i, matches);
......
...@@ -79,24 +79,32 @@ class RegExpImpl { ...@@ -79,24 +79,32 @@ class RegExpImpl {
Handle<String> subject, Handle<String> subject,
Handle<Object> index); Handle<Object> index);
template <typename T>
static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp, static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject, Handle<String> subject);
Vector<const T> contents);
static void NewSpaceCollectionPrologue();
static void OldSpaceCollectionPrologue();
private: private:
// Converts a source string to a 16 bit flat string. The string
// will be either sequential or it will be a SlicedString backed
// by a flat string.
static Handle<String> StringToTwoByte(Handle<String> pattern);
static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
static String* last_ascii_string_;
static String* two_byte_cached_string_;
// Returns the caputure from the re. // Returns the caputure from the re.
static int JsreCapture(Handle<JSRegExp> re); static int JsreCapture(Handle<JSRegExp> re);
static ByteArray* JsreInternal(Handle<JSRegExp> re); static ByteArray* JsreInternal(Handle<JSRegExp> re);
// Call jsRegExpExecute once // Call jsRegExpExecute once
template <typename T>
static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp, static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp,
int num_captures, int num_captures,
Handle<String> subject, Handle<String> subject,
int previous_index, int previous_index,
Vector<const T> contents, const uc16* utf8_subject,
int* ovector, int* ovector,
int ovector_length); int ovector_length);
......
...@@ -964,21 +964,7 @@ Object* JSObject::AddFastProperty(String* name, ...@@ -964,21 +964,7 @@ Object* JSObject::AddFastProperty(String* name,
return AddSlowProperty(name, value, attributes); return AddSlowProperty(name, value, attributes);
} }
// Replace a CONSTANT_TRANSITION flag with a transition.
// Do this by removing it, and the standard code for adding a map transition
// will then run.
DescriptorArray* old_descriptors = map()->instance_descriptors(); DescriptorArray* old_descriptors = map()->instance_descriptors();
int old_name_index = old_descriptors->Search(name);
bool constant_transition = false; // Only used in assertions.
if (old_name_index != DescriptorArray::kNotFound && CONSTANT_TRANSITION ==
PropertyDetails(old_descriptors->GetDetails(old_name_index)).type()) {
constant_transition = true;
Object* r = old_descriptors->CopyRemove(name);
if (r->IsFailure()) return r;
old_descriptors = DescriptorArray::cast(r);
old_name_index = DescriptorArray::kNotFound;
}
// Compute the new index for new field. // Compute the new index for new field.
int index = map()->NextFreePropertyIndex(); int index = map()->NextFreePropertyIndex();
...@@ -993,64 +979,43 @@ Object* JSObject::AddFastProperty(String* name, ...@@ -993,64 +979,43 @@ Object* JSObject::AddFastProperty(String* name,
bool allow_map_transition = bool allow_map_transition =
!old_descriptors->Contains(name) && !old_descriptors->Contains(name) &&
(Top::context()->global_context()->object_function()->map() != map()); (Top::context()->global_context()->object_function()->map() != map());
ASSERT(allow_map_transition || !constant_transition);
if (map()->unused_property_fields() > 0) { ASSERT(index < properties()->length() ||
ASSERT(index < properties()->length()); map()->unused_property_fields() == 0);
// Allocate a new map for the object. // Allocate a new map for the object.
Object* r = map()->Copy(); Object* r = map()->Copy();
if (r->IsFailure()) return r;
Map* new_map = Map::cast(r);
if (allow_map_transition) {
// Allocate new instance descriptors for the old map with map transition.
MapTransitionDescriptor d(name, Map::cast(new_map), attributes);
Object* r = old_descriptors->CopyInsert(&d, KEEP_TRANSITIONS);
if (r->IsFailure()) return r; if (r->IsFailure()) return r;
Map* new_map = Map::cast(r); old_descriptors = DescriptorArray::cast(r);
if (allow_map_transition) { }
// Allocate new instance descriptors for the old map with map transition.
MapTransitionDescriptor d(name, Map::cast(new_map), attributes);
Object* r = old_descriptors->CopyInsert(&d, KEEP_TRANSITIONS);
if (r->IsFailure()) return r;
old_descriptors = DescriptorArray::cast(r);
}
// We have now allocated all the necessary objects.
// All the changes can be applied at once, so they are atomic.
map()->set_instance_descriptors(old_descriptors);
new_map->set_instance_descriptors(DescriptorArray::cast(new_descriptors));
new_map->set_unused_property_fields(map()->unused_property_fields() - 1);
set_map(new_map);
properties()->set(index, value);
} else {
ASSERT(map()->unused_property_fields() == 0);
if (map()->unused_property_fields() == 0) {
if (properties()->length() > kMaxFastProperties) { if (properties()->length() > kMaxFastProperties) {
Object* obj = NormalizeProperties(); Object* obj = NormalizeProperties();
if (obj->IsFailure()) return obj; if (obj->IsFailure()) return obj;
return AddSlowProperty(name, value, attributes); return AddSlowProperty(name, value, attributes);
} }
static const int kExtraFields = 3;
// Make room for the new value // Make room for the new value
Object* values = Object* values =
properties()->CopySize(properties()->length() + kExtraFields); properties()->CopySize(properties()->length() + kFieldsAdded);
if (values->IsFailure()) return values; if (values->IsFailure()) return values;
FixedArray::cast(values)->set(index, value);
// Allocate a new map for the object.
Object* r = map()->Copy();
if (r->IsFailure()) return r;
Map* new_map = Map::cast(r);
if (allow_map_transition) {
MapTransitionDescriptor d(name, Map::cast(new_map), attributes);
// Allocate new instance descriptors for the old map with map transition.
Object* r = old_descriptors->CopyInsert(&d, KEEP_TRANSITIONS);
if (r->IsFailure()) return r;
old_descriptors = DescriptorArray::cast(r);
}
// We have now allocated all the necessary objects.
// All changes can be done at once, atomically.
map()->set_instance_descriptors(old_descriptors);
new_map->set_instance_descriptors(DescriptorArray::cast(new_descriptors));
new_map->set_unused_property_fields(kExtraFields - 1);
set_map(new_map);
set_properties(FixedArray::cast(values)); set_properties(FixedArray::cast(values));
new_map->set_unused_property_fields(kFieldsAdded - 1);
} else {
new_map->set_unused_property_fields(map()->unused_property_fields() - 1);
} }
// We have now allocated all the necessary objects.
// All the changes can be applied at once, so they are atomic.
map()->set_instance_descriptors(old_descriptors);
new_map->set_instance_descriptors(DescriptorArray::cast(new_descriptors));
set_map(new_map);
properties()->set(index, value);
return value; return value;
} }
...@@ -1104,74 +1069,6 @@ Object* JSObject::AddConstantFunctionProperty(String* name, ...@@ -1104,74 +1069,6 @@ Object* JSObject::AddConstantFunctionProperty(String* name,
} }
Object* JSObject::ReplaceConstantFunctionProperty(String* name,
Object* value) {
// There are two situations to handle here:
// 1: Replace a constant function with another function.
// 2: Replace a constant function with an object.
if (value->IsJSFunction()) {
JSFunction* function = JSFunction::cast(value);
Object* new_map = map()->CopyDropTransitions();
if (new_map->IsFailure()) return new_map;
set_map(Map::cast(new_map));
// Replace the function entry
int index = map()->instance_descriptors()->Search(name);
ASSERT(index != DescriptorArray::kNotFound);
map()->instance_descriptors()->ReplaceConstantFunction(index, function);
} else {
// Allocate new instance descriptors with updated property index.
int index = map()->NextFreePropertyIndex();
Object* new_descriptors =
map()->instance_descriptors()->CopyReplace(name, index, NONE);
if (new_descriptors->IsFailure()) return new_descriptors;
if (map()->unused_property_fields() > 0) {
ASSERT(index < properties()->length());
// Allocate a new map for the object.
Object* new_map = map()->Copy();
if (new_map->IsFailure()) return new_map;
Map::cast(new_map)->
set_instance_descriptors(DescriptorArray::cast(new_descriptors));
Map::cast(new_map)->
set_unused_property_fields(map()->unused_property_fields()-1);
set_map(Map::cast(new_map));
properties()->set(index, value);
} else {
ASSERT(map()->unused_property_fields() == 0);
static const int kFastNofProperties = 20;
if (properties()->length() > kFastNofProperties) {
Object* obj = NormalizeProperties();
if (obj->IsFailure()) return obj;
return SetProperty(name, value, NONE);
}
static const int kExtraFields = 5;
// Make room for the more properties.
Object* values =
properties()->CopySize(properties()->length() + kExtraFields);
if (values->IsFailure()) return values;
FixedArray::cast(values)->set(index, value);
// Allocate a new map for the object.
Object* new_map = map()->Copy();
if (new_map->IsFailure()) return new_map;
Map::cast(new_map)->
set_instance_descriptors(DescriptorArray::cast(new_descriptors));
Map::cast(new_map)->
set_unused_property_fields(kExtraFields - 1);
set_map(Map::cast(new_map));
set_properties(FixedArray::cast(values));
}
}
return value;
}
// Add property in slow mode // Add property in slow mode
Object* JSObject::AddSlowProperty(String* name, Object* JSObject::AddSlowProperty(String* name,
Object* value, Object* value,
...@@ -1223,6 +1120,103 @@ Object* JSObject::SetPropertyPostInterceptor(String* name, ...@@ -1223,6 +1120,103 @@ Object* JSObject::SetPropertyPostInterceptor(String* name,
} }
Object* JSObject::ReplaceSlowProperty(String* name,
Object* value,
PropertyAttributes attributes) {
Dictionary* dictionary = property_dictionary();
PropertyDetails old_details =
dictionary->DetailsAt(dictionary->FindStringEntry(name));
int new_index = old_details.index();
if (old_details.IsTransition()) new_index = 0;
PropertyDetails new_details(attributes, NORMAL, old_details.index());
Object* result =
property_dictionary()->SetOrAddStringEntry(name, value, new_details);
if (result->IsFailure()) return result;
if (property_dictionary() != result) {
set_properties(Dictionary::cast(result));
}
return value;
}
Object* JSObject::ConvertDescriptorToFieldAndMapTransition(
String* name,
Object* new_value,
PropertyAttributes attributes) {
Map* old_map = map();
Object* result = ConvertDescriptorToField(name, new_value, attributes);
if (result->IsFailure()) return result;
// If we get to this point we have succeeded - do not return failure
// after this point. Later stuff is optional.
if (!HasFastProperties()) {
return result;
}
// Do not add transitions to the map of "new Object()".
if (map() == Top::context()->global_context()->object_function()->map()) {
return result;
}
MapTransitionDescriptor transition(name,
map(),
attributes);
Object* new_descriptors =
old_map->instance_descriptors()->
CopyInsert(&transition, KEEP_TRANSITIONS);
if (new_descriptors->IsFailure()) return result; // Yes, return _result_.
old_map->set_instance_descriptors(DescriptorArray::cast(new_descriptors));
return result;
}
Object* JSObject::ConvertDescriptorToField(String* name,
Object* new_value,
PropertyAttributes attributes) {
if (map()->unused_property_fields() == 0 &&
properties()->length() > kMaxFastProperties) {
Object* obj = NormalizeProperties();
if (obj->IsFailure()) return obj;
return ReplaceSlowProperty(name, new_value, attributes);
}
int index = map()->NextFreePropertyIndex();
FieldDescriptor new_field(name, index, attributes);
// Make a new DescriptorArray replacing an entry with FieldDescriptor.
Object* descriptors_unchecked = map()->instance_descriptors()->
CopyInsert(&new_field, REMOVE_TRANSITIONS);
if (descriptors_unchecked->IsFailure()) return descriptors_unchecked;
DescriptorArray* new_descriptors =
DescriptorArray::cast(descriptors_unchecked);
// Make a new map for the object.
Object* new_map_unchecked = map()->Copy();
if (new_map_unchecked->IsFailure()) return new_map_unchecked;
Map* new_map = Map::cast(new_map_unchecked);
new_map->set_instance_descriptors(new_descriptors);
// Make new properties array if necessary.
FixedArray* new_properties = 0; // Will always be NULL or a valid pointer.
int new_unused_property_fields = map()->unused_property_fields() - 1;
if (map()->unused_property_fields() == 0) {
new_unused_property_fields = kFieldsAdded - 1;
Object* new_properties_unchecked =
properties()->CopySize(properties()->length() + kFieldsAdded);
if (new_properties_unchecked->IsFailure()) return new_properties_unchecked;
new_properties = FixedArray::cast(new_properties_unchecked);
}
// Update pointers to commit changes.
// Object points to the new map.
new_map->set_unused_property_fields(new_unused_property_fields);
set_map(new_map);
if (new_properties) {
set_properties(FixedArray::cast(new_properties));
}
properties()->set(index, new_value);
return new_value;
}
Object* JSObject::SetPropertyWithInterceptor(String* name, Object* JSObject::SetPropertyWithInterceptor(String* name,
Object* value, Object* value,
PropertyAttributes attributes) { PropertyAttributes attributes) {
...@@ -1528,13 +1522,12 @@ Object* JSObject::SetProperty(LookupResult* result, ...@@ -1528,13 +1522,12 @@ Object* JSObject::SetProperty(LookupResult* result,
return AddFastPropertyUsingMap(result->GetTransitionMap(), return AddFastPropertyUsingMap(result->GetTransitionMap(),
name, name,
value); value);
} else {
return AddFastProperty(name, value, attributes);
} }
return ConvertDescriptorToField(name, value, attributes);
case CONSTANT_FUNCTION: case CONSTANT_FUNCTION:
if (value == result->GetConstantFunction()) return value; if (value == result->GetConstantFunction()) return value;
// Only replace the function if necessary. // Only replace the function if necessary.
return ReplaceConstantFunctionProperty(name, value); return ConvertDescriptorToFieldAndMapTransition(name, value, attributes);
case CALLBACKS: case CALLBACKS:
return SetPropertyWithCallback(result->GetCallbackObject(), return SetPropertyWithCallback(result->GetCallbackObject(),
name, name,
...@@ -1545,10 +1538,9 @@ Object* JSObject::SetProperty(LookupResult* result, ...@@ -1545,10 +1538,9 @@ Object* JSObject::SetProperty(LookupResult* result,
case CONSTANT_TRANSITION: case CONSTANT_TRANSITION:
// Replace with a MAP_TRANSITION to a new map with a FIELD, even // Replace with a MAP_TRANSITION to a new map with a FIELD, even
// if the value is a function. // if the value is a function.
// AddProperty has been extended to do this, in this case. return ConvertDescriptorToFieldAndMapTransition(name, value, attributes);
return AddFastProperty(name, value, attributes);
case NULL_DESCRIPTOR: case NULL_DESCRIPTOR:
UNREACHABLE(); return ConvertDescriptorToFieldAndMapTransition(name, value, attributes);
default: default:
UNREACHABLE(); UNREACHABLE();
} }
...@@ -1580,33 +1572,14 @@ Object* JSObject::IgnoreAttributesAndSetLocalProperty( ...@@ -1580,33 +1572,14 @@ Object* JSObject::IgnoreAttributesAndSetLocalProperty(
&& !Top::MayNamedAccess(this, name, v8::ACCESS_SET)) { && !Top::MayNamedAccess(this, name, v8::ACCESS_SET)) {
return SetPropertyWithFailedAccessCheck(result, name, value); return SetPropertyWithFailedAccessCheck(result, name, value);
} }
/* // Check for accessor in prototype chain removed here in clone.
REMOVED FROM CLONE
if (result->IsNotFound() || !result->IsProperty()) {
// We could not find a local property so let's check whether there is an
// accessor that wants to handle the property.
LookupResult accessor_result;
LookupCallbackSetterInPrototypes(name, &accessor_result);
if (accessor_result.IsValid()) {
return SetPropertyWithCallback(accessor_result.GetCallbackObject(),
name,
value,
accessor_result.holder());
}
}
*/
if (result->IsNotFound()) { if (result->IsNotFound()) {
return AddProperty(name, value, attributes); return AddProperty(name, value, attributes);
} }
if (!result->IsLoaded()) { if (!result->IsLoaded()) {
return SetLazyProperty(result, name, value, attributes); return SetLazyProperty(result, name, value, attributes);
} }
/* // Check of IsReadOnly removed from here in clone.
REMOVED FROM CLONE
if (result->IsReadOnly() && result->IsProperty()) return value;
*/
// This is a real property that is not read-only, or it is a
// transition or null descriptor and there are no setters in the prototypes.
switch (result->type()) { switch (result->type()) {
case NORMAL: case NORMAL:
property_dictionary()->ValueAtPut(result->GetDictionaryEntry(), value); property_dictionary()->ValueAtPut(result->GetDictionaryEntry(), value);
...@@ -1621,12 +1594,12 @@ Object* JSObject::IgnoreAttributesAndSetLocalProperty( ...@@ -1621,12 +1594,12 @@ Object* JSObject::IgnoreAttributesAndSetLocalProperty(
name, name,
value); value);
} else { } else {
return AddFastProperty(name, value, attributes); return ConvertDescriptorToField(name, value, attributes);
} }
case CONSTANT_FUNCTION: case CONSTANT_FUNCTION:
if (value == result->GetConstantFunction()) return value; if (value == result->GetConstantFunction()) return value;
// Only replace the function if necessary. // Only replace the function if necessary.
return ReplaceConstantFunctionProperty(name, value); return ConvertDescriptorToFieldAndMapTransition(name, value, attributes);
case CALLBACKS: case CALLBACKS:
return SetPropertyWithCallback(result->GetCallbackObject(), return SetPropertyWithCallback(result->GetCallbackObject(),
name, name,
...@@ -1637,10 +1610,9 @@ Object* JSObject::IgnoreAttributesAndSetLocalProperty( ...@@ -1637,10 +1610,9 @@ Object* JSObject::IgnoreAttributesAndSetLocalProperty(
case CONSTANT_TRANSITION: case CONSTANT_TRANSITION:
// Replace with a MAP_TRANSITION to a new map with a FIELD, even // Replace with a MAP_TRANSITION to a new map with a FIELD, even
// if the value is a function. // if the value is a function.
// AddProperty has been extended to do this, in this case. return ConvertDescriptorToFieldAndMapTransition(name, value, attributes);
return AddFastProperty(name, value, attributes);
case NULL_DESCRIPTOR: case NULL_DESCRIPTOR:
UNREACHABLE(); return ConvertDescriptorToFieldAndMapTransition(name, value, attributes);
default: default:
UNREACHABLE(); UNREACHABLE();
} }
...@@ -2663,14 +2635,6 @@ void DescriptorArray::SetEnumCache(FixedArray* bridge_storage, ...@@ -2663,14 +2635,6 @@ void DescriptorArray::SetEnumCache(FixedArray* bridge_storage,
} }
void DescriptorArray::ReplaceConstantFunction(int descriptor_number,
JSFunction* value) {
ASSERT(!Heap::InNewSpace(value));
FixedArray* content_array = GetContentArray();
fast_set(content_array, ToValueIndex(descriptor_number), value);
}
Object* DescriptorArray::CopyInsert(Descriptor* descriptor, Object* DescriptorArray::CopyInsert(Descriptor* descriptor,
TransitionFlag transition_flag) { TransitionFlag transition_flag) {
// Transitions are only kept when inserting another transition. // Transitions are only kept when inserting another transition.
...@@ -2771,69 +2735,6 @@ Object* DescriptorArray::CopyInsert(Descriptor* descriptor, ...@@ -2771,69 +2735,6 @@ Object* DescriptorArray::CopyInsert(Descriptor* descriptor,
} }
Object* DescriptorArray::CopyReplace(String* name,
int index,
PropertyAttributes attributes) {
// Allocate the new descriptor array.
Object* result = DescriptorArray::Allocate(number_of_descriptors());
if (result->IsFailure()) return result;
// Make sure only symbols are added to the instance descriptor.
if (!name->IsSymbol()) {
Object* result = Heap::LookupSymbol(name);
if (result->IsFailure()) return result;
name = String::cast(result);
}
DescriptorWriter w(DescriptorArray::cast(result));
for (DescriptorReader r(this); !r.eos(); r.advance()) {
if (r.Equals(name)) {
FieldDescriptor d(name, index, attributes);
d.SetEnumerationIndex(r.GetDetails().index());
w.Write(&d);
} else {
w.WriteFrom(&r);
}
}
// Copy the next enumeration index.
DescriptorArray::cast(result)->
SetNextEnumerationIndex(NextEnumerationIndex());
ASSERT(w.eos());
return result;
}
Object* DescriptorArray::CopyRemove(String* name) {
if (!name->IsSymbol()) {
Object* result = Heap::LookupSymbol(name);
if (result->IsFailure()) return result;
name = String::cast(result);
}
ASSERT(name->IsSymbol());
Object* result = Allocate(number_of_descriptors() - 1);
if (result->IsFailure()) return result;
DescriptorArray* new_descriptors = DescriptorArray::cast(result);
// Set the enumeration index in the descriptors and set the enumeration index
// in the result.
new_descriptors->SetNextEnumerationIndex(NextEnumerationIndex());
// Write the old content and the descriptor information
DescriptorWriter w(new_descriptors);
DescriptorReader r(this);
while (!r.eos()) {
if (r.GetKey() != name) { // Both are symbols; object identity suffices.
w.WriteFrom(&r);
}
r.advance();
}
ASSERT(w.eos());
return new_descriptors;
}
Object* DescriptorArray::RemoveTransitions() { Object* DescriptorArray::RemoveTransitions() {
// Remove all transitions. Return a copy of the array with all transitions // Remove all transitions. Return a copy of the array with all transitions
// removed, or a Failure object if the new array could not be allocated. // removed, or a Failure object if the new array could not be allocated.
......
...@@ -1304,9 +1304,26 @@ class JSObject: public HeapObject { ...@@ -1304,9 +1304,26 @@ class JSObject: public HeapObject {
JSFunction* function, JSFunction* function,
PropertyAttributes attributes); PropertyAttributes attributes);
// Replace a constant function property on a fast-case object. Object* ReplaceSlowProperty(String* name,
Object* ReplaceConstantFunctionProperty(String* name, Object* value,
Object* value); PropertyAttributes attributes);
// Converts a descriptor of any other type to a real field,
// backed by the properties array. Descriptors of visible
// types, such as CONSTANT_FUNCTION, keep their enumeration order.
// Converts the descriptor on the original object's map to a
// map transition, and the the new field is on the object's new map.
Object* ConvertDescriptorToFieldAndMapTransition(
String* name,
Object* new_value,
PropertyAttributes attributes);
// Converts a descriptor of any other type to a real field,
// backed by the properties array. Descriptors of visible
// types, such as CONSTANT_FUNCTION, keep their enumeration order.
Object* ConvertDescriptorToField(String* name,
Object* new_value,
PropertyAttributes attributes);
// Add a property to a fast-case object. // Add a property to a fast-case object.
Object* AddFastProperty(String* name, Object* AddFastProperty(String* name,
...@@ -1378,6 +1395,10 @@ class JSObject: public HeapObject { ...@@ -1378,6 +1395,10 @@ class JSObject: public HeapObject {
static const uint32_t kMaxGap = 1024; static const uint32_t kMaxGap = 1024;
static const int kMaxFastElementsLength = 5000; static const int kMaxFastElementsLength = 5000;
static const int kMaxFastProperties = 8; static const int kMaxFastProperties = 8;
// When extending the backing storage for property values, we increase
// its size by more than the 1 entry necessary, so sequentially adding fields
// to the same object requires fewer allocations and copies.
static const int kFieldsAdded = 3;
// Layout description. // Layout description.
static const int kPropertiesOffset = HeapObject::kHeaderSize; static const int kPropertiesOffset = HeapObject::kHeaderSize;
...@@ -1563,7 +1584,6 @@ class DescriptorArray: public FixedArray { ...@@ -1563,7 +1584,6 @@ class DescriptorArray: public FixedArray {
inline void Get(int descriptor_number, Descriptor* desc); inline void Get(int descriptor_number, Descriptor* desc);
inline void Set(int descriptor_number, Descriptor* desc); inline void Set(int descriptor_number, Descriptor* desc);
void ReplaceConstantFunction(int descriptor_number, JSFunction* value);
// Copy the descriptor array, insert a new descriptor and optionally // Copy the descriptor array, insert a new descriptor and optionally
// remove map transitions. If the descriptor is already present, it is // remove map transitions. If the descriptor is already present, it is
...@@ -1573,20 +1593,6 @@ class DescriptorArray: public FixedArray { ...@@ -1573,20 +1593,6 @@ class DescriptorArray: public FixedArray {
// a transition, they must not be removed. All null descriptors are removed. // a transition, they must not be removed. All null descriptors are removed.
Object* CopyInsert(Descriptor* descriptor, TransitionFlag transition_flag); Object* CopyInsert(Descriptor* descriptor, TransitionFlag transition_flag);
// Makes a copy of the descriptor array with the descriptor with key name
// removed. If name is the empty string, the descriptor array is copied.
// Transitions are removed if TransitionFlag is REMOVE_TRANSITIONS.
// All null descriptors are removed.
Object* CopyRemove(TransitionFlag remove_transitions, String* name);
// Copy the descriptor array, replace the property index and attributes
// of the named property, but preserve its enumeration index.
Object* CopyReplace(String* name, int index, PropertyAttributes attributes);
// Copy the descriptor array, removing the property index and attributes
// of the named property.
Object* CopyRemove(String* name);
// Remove all transitions. Return a copy of the array with all transitions // Remove all transitions. Return a copy of the array with all transitions
// removed, or a Failure object if the new array could not be allocated. // removed, or a Failure object if the new array could not be allocated.
Object* RemoveTransitions(); Object* RemoveTransitions();
......
...@@ -66,15 +66,13 @@ const int JSRegExpErrorInternal = -4; ...@@ -66,15 +66,13 @@ const int JSRegExpErrorInternal = -4;
typedef void* malloc_t(size_t size); typedef void* malloc_t(size_t size);
typedef void free_t(void* address); typedef void free_t(void* address);
template <typename Char> JSRegExp* jsRegExpCompile(const UChar* pattern, int patternLength,
JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength,
JSRegExpIgnoreCaseOption, JSRegExpMultilineOption, JSRegExpIgnoreCaseOption, JSRegExpMultilineOption,
unsigned* numSubpatterns, const char** errorMessage, unsigned* numSubpatterns, const char** errorMessage,
malloc_t* allocate_function, free_t* free_function); malloc_t* allocate_function, free_t* free_function);
template <typename Char>
int jsRegExpExecute(const JSRegExp*, int jsRegExpExecute(const JSRegExp*,
const Char* subject, int subjectLength, int startOffset, const UChar* subject, int subjectLength, int startOffset,
int* offsetsVector, int offsetsVectorLength); int* offsetsVector, int offsetsVectorLength);
void jsRegExpFree(JSRegExp*); void jsRegExpFree(JSRegExp*);
......
...@@ -147,8 +147,7 @@ struct CompileData { ...@@ -147,8 +147,7 @@ struct CompileData {
/* Definitions to allow mutual recursion */ /* Definitions to allow mutual recursion */
template <typename Char> static bool compileBracket(int, int*, unsigned char**, const UChar**, const UChar*, ErrorCode*, int, int*, int*, CompileData&);
static bool compileBracket(int, int*, unsigned char**, const Char**, const Char*, ErrorCode*, int, int*, int*, CompileData&);
static bool bracketIsAnchored(const unsigned char* code); static bool bracketIsAnchored(const unsigned char* code);
static bool bracketNeedsLineStart(const unsigned char* code, unsigned captureMap, unsigned backrefMap); static bool bracketNeedsLineStart(const unsigned char* code, unsigned captureMap, unsigned backrefMap);
static int bracketFindFirstAssertedCharacter(const unsigned char* code, bool inassert); static int bracketFindFirstAssertedCharacter(const unsigned char* code, bool inassert);
...@@ -175,10 +174,9 @@ Returns: zero or positive => a data character ...@@ -175,10 +174,9 @@ Returns: zero or positive => a data character
on error, errorptr is set on error, errorptr is set
*/ */
template <typename Char> static int checkEscape(const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int bracount, bool isclass)
static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* errorcodeptr, int bracount, bool isclass)
{ {
const Char* ptr = *ptrptr + 1; const UChar* ptr = *ptrptr + 1;
/* If backslash is at the end of the pattern, it's an error. */ /* If backslash is at the end of the pattern, it's an error. */
if (ptr == patternEnd) { if (ptr == patternEnd) {
...@@ -186,13 +184,13 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e ...@@ -186,13 +184,13 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e
*ptrptr = ptr; *ptrptr = ptr;
return 0; return 0;
} }
int c = *ptr; int c = *ptr;
/* Non-alphamerics are literals. For digits or letters, do an initial lookup in /* Non-alphamerics are literals. For digits or letters, do an initial lookup in
a table. A non-zero result is something that can be returned immediately. a table. A non-zero result is something that can be returned immediately.
Otherwise further processing may be required. */ Otherwise further processing may be required. */
if (c < '0' || c > 'z') { /* Not alphameric */ if (c < '0' || c > 'z') { /* Not alphameric */
} else if (int escapeValue = escapes[c - '0']) { } else if (int escapeValue = escapes[c - '0']) {
c = escapeValue; c = escapeValue;
...@@ -203,7 +201,7 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e ...@@ -203,7 +201,7 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e
c = 'B'; /* and \B is a capital B in a class (in browsers event though ECMAScript 15.10.2.19 says it raises an error) */ c = 'B'; /* and \B is a capital B in a class (in browsers event though ECMAScript 15.10.2.19 says it raises an error) */
} }
/* Escapes that need further processing, or are illegal. */ /* Escapes that need further processing, or are illegal. */
} else { } else {
switch (c) { switch (c) {
case '1': case '1':
...@@ -219,9 +217,9 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e ...@@ -219,9 +217,9 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e
unless there are insufficient brackets, in which case they are octal unless there are insufficient brackets, in which case they are octal
escape sequences. Those sequences end on the first non-octal character escape sequences. Those sequences end on the first non-octal character
or when we overflow 0-255, whichever comes first. */ or when we overflow 0-255, whichever comes first. */
if (!isclass) { if (!isclass) {
const Char* oldptr = ptr; const UChar* oldptr = ptr;
c -= '0'; c -= '0';
while ((ptr + 1 < patternEnd) && isASCIIDigit(ptr[1]) && c <= bracount) while ((ptr + 1 < patternEnd) && isASCIIDigit(ptr[1]) && c <= bracount)
c = c * 10 + *(++ptr) - '0'; c = c * 10 + *(++ptr) - '0';
...@@ -231,10 +229,10 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e ...@@ -231,10 +229,10 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e
} }
ptr = oldptr; /* Put the pointer back and fall through */ ptr = oldptr; /* Put the pointer back and fall through */
} }
/* Handle an octal number following \. If the first digit is 8 or 9, /* Handle an octal number following \. If the first digit is 8 or 9,
this is not octal. */ this is not octal. */
if ((c = *ptr) >= '8') if ((c = *ptr) >= '8')
break; break;
...@@ -298,14 +296,14 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e ...@@ -298,14 +296,14 @@ static int checkEscape(const Char** ptrptr, const Char* patternEnd, ErrorCode* e
return 0; return 0;
} }
c = *ptr; c = *ptr;
/* A letter is upper-cased; then the 0x40 bit is flipped. This coding /* A letter is upper-cased; then the 0x40 bit is flipped. This coding
is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */ is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */
c = toASCIIUpper(c) ^ 0x40; c = toASCIIUpper(c) ^ 0x40;
break; break;
} }
} }
*ptrptr = ptr; *ptrptr = ptr;
return c; return c;
} }
...@@ -325,8 +323,7 @@ Arguments: ...@@ -325,8 +323,7 @@ Arguments:
Returns: true or false Returns: true or false
*/ */
template <typename Char> static bool isCountedRepeat(const UChar* p, const UChar* patternEnd)
static bool isCountedRepeat(const Char* p, const Char* patternEnd)
{ {
if (p >= patternEnd || !isASCIIDigit(*p)) if (p >= patternEnd || !isASCIIDigit(*p))
return false; return false;
...@@ -335,18 +332,18 @@ static bool isCountedRepeat(const Char* p, const Char* patternEnd) ...@@ -335,18 +332,18 @@ static bool isCountedRepeat(const Char* p, const Char* patternEnd)
p++; p++;
if (p < patternEnd && *p == '}') if (p < patternEnd && *p == '}')
return true; return true;
if (p >= patternEnd || *p++ != ',') if (p >= patternEnd || *p++ != ',')
return false; return false;
if (p < patternEnd && *p == '}') if (p < patternEnd && *p == '}')
return true; return true;
if (p >= patternEnd || !isASCIIDigit(*p)) if (p >= patternEnd || !isASCIIDigit(*p))
return false; return false;
p++; p++;
while (p < patternEnd && isASCIIDigit(*p)) while (p < patternEnd && isASCIIDigit(*p))
p++; p++;
return (p < patternEnd && *p == '}'); return (p < patternEnd && *p == '}');
} }
...@@ -369,25 +366,24 @@ Returns: pointer to '}' on success; ...@@ -369,25 +366,24 @@ Returns: pointer to '}' on success;
current ptr on error, with errorcodeptr set non-zero current ptr on error, with errorcodeptr set non-zero
*/ */
template <typename Char> static const UChar* readRepeatCounts(const UChar* p, int* minp, int* maxp, ErrorCode* errorcodeptr)
static const Char* readRepeatCounts(const Char* p, int* minp, int* maxp, ErrorCode* errorcodeptr)
{ {
int min = 0; int min = 0;
int max = -1; int max = -1;
/* Read the minimum value and do a paranoid check: a negative value indicates /* Read the minimum value and do a paranoid check: a negative value indicates
an integer overflow. */ an integer overflow. */
while (isASCIIDigit(*p)) while (isASCIIDigit(*p))
min = min * 10 + *p++ - '0'; min = min * 10 + *p++ - '0';
if (min < 0 || min > 65535) { if (min < 0 || min > 65535) {
*errorcodeptr = ERR5; *errorcodeptr = ERR5;
return p; return p;
} }
/* Read the maximum value if there is one, and again do a paranoid on its size. /* Read the maximum value if there is one, and again do a paranoid on its size.
Also, max must not be less than min. */ Also, max must not be less than min. */
if (*p == '}') if (*p == '}')
max = min; max = min;
else { else {
...@@ -405,10 +401,10 @@ static const Char* readRepeatCounts(const Char* p, int* minp, int* maxp, ErrorCo ...@@ -405,10 +401,10 @@ static const Char* readRepeatCounts(const Char* p, int* minp, int* maxp, ErrorCo
} }
} }
} }
/* Fill in the required variables, and pass back the pointer to the terminating /* Fill in the required variables, and pass back the pointer to the terminating
'}'. */ '}'. */
*minp = min; *minp = min;
*maxp = max; *maxp = max;
return p; return p;
...@@ -476,27 +472,27 @@ Yield: true when range returned; false when no more ...@@ -476,27 +472,27 @@ Yield: true when range returned; false when no more
static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr) static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr)
{ {
int c, othercase = 0; int c, othercase = 0;
for (c = *cptr; c <= d; c++) { for (c = *cptr; c <= d; c++) {
if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0) if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0)
break; break;
} }
if (c > d) if (c > d)
return false; return false;
*ocptr = othercase; *ocptr = othercase;
int next = othercase + 1; int next = othercase + 1;
for (++c; c <= d; c++) { for (++c; c <= d; c++) {
if (kjs_pcre_ucp_othercase(c) != next) if (kjs_pcre_ucp_othercase(c) != next)
break; break;
next++; next++;
} }
*odptr = next - 1; *odptr = next - 1;
*cptr = c; *cptr = c;
return true; return true;
} }
...@@ -506,11 +502,11 @@ static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr) ...@@ -506,11 +502,11 @@ static bool getOthercaseRange(int* cptr, int d, int* ocptr, int* odptr)
/* This function takes an integer value in the range 0 - 0x7fffffff /* This function takes an integer value in the range 0 - 0x7fffffff
and encodes it as a UTF-8 character in 0 to 6 bytes. and encodes it as a UTF-8 character in 0 to 6 bytes.
Arguments: Arguments:
cvalue the character value cvalue the character value
buffer pointer to buffer for result - at least 6 bytes long buffer pointer to buffer for result - at least 6 bytes long
Returns: number of characters placed in the buffer Returns: number of characters placed in the buffer
*/ */
...@@ -549,16 +545,14 @@ Returns: true on success ...@@ -549,16 +545,14 @@ Returns: true on success
false, with *errorcodeptr set non-zero on error false, with *errorcodeptr set non-zero on error
*/ */
template <typename Char> static inline bool safelyCheckNextChar(const UChar* ptr, const UChar* patternEnd, UChar expected)
static inline bool safelyCheckNextChar(const Char* ptr, const Char* patternEnd, char expected)
{ {
return ((ptr + 1 < patternEnd) && ptr[1] == expected); return ((ptr + 1 < patternEnd) && ptr[1] == expected);
} }
template <typename Char>
static bool static bool
compileBranch(int options, int* brackets, unsigned char** codeptr, compileBranch(int options, int* brackets, unsigned char** codeptr,
const Char** ptrptr, const Char* patternEnd, ErrorCode* errorcodeptr, int *firstbyteptr, const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int *firstbyteptr,
int* reqbyteptr, CompileData& cd) int* reqbyteptr, CompileData& cd)
{ {
int repeat_type, op_type; int repeat_type, op_type;
...@@ -569,39 +563,39 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -569,39 +563,39 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
unsigned char* code = *codeptr; unsigned char* code = *codeptr;
unsigned char* tempcode; unsigned char* tempcode;
bool groupsetfirstbyte = false; bool groupsetfirstbyte = false;
const Char* ptr = *ptrptr; const UChar* ptr = *ptrptr;
const Char* tempptr; const UChar* tempptr;
unsigned char* previous = NULL; unsigned char* previous = NULL;
unsigned char classbits[32]; unsigned char classbits[32];
bool class_utf8; bool class_utf8;
unsigned char* class_utf8data; unsigned char* class_utf8data;
unsigned char utf8_char[6]; unsigned char utf8_char[6];
/* Initialize no first byte, no required byte. REQ_UNSET means "no char /* Initialize no first byte, no required byte. REQ_UNSET means "no char
matching encountered yet". It gets changed to REQ_NONE if we hit something that matching encountered yet". It gets changed to REQ_NONE if we hit something that
matches a non-fixed char first char; reqbyte just remains unset if we never matches a non-fixed char first char; reqbyte just remains unset if we never
find one. find one.
When we hit a repeat whose minimum is zero, we may have to adjust these values When we hit a repeat whose minimum is zero, we may have to adjust these values
to take the zero repeat into account. This is implemented by setting them to to take the zero repeat into account. This is implemented by setting them to
zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual
item types that can be repeated set these backoff variables appropriately. */ item types that can be repeated set these backoff variables appropriately. */
int firstbyte = REQ_UNSET; int firstbyte = REQ_UNSET;
int reqbyte = REQ_UNSET; int reqbyte = REQ_UNSET;
int zeroreqbyte = REQ_UNSET; int zeroreqbyte = REQ_UNSET;
int zerofirstbyte = REQ_UNSET; int zerofirstbyte = REQ_UNSET;
/* The variable req_caseopt contains either the REQ_IGNORE_CASE value or zero, /* The variable req_caseopt contains either the REQ_IGNORE_CASE value or zero,
according to the current setting of the ignores-case flag. REQ_IGNORE_CASE is a bit according to the current setting of the ignores-case flag. REQ_IGNORE_CASE is a bit
value > 255. It is added into the firstbyte or reqbyte variables to record the value > 255. It is added into the firstbyte or reqbyte variables to record the
case status of the value. This is used only for ASCII characters. */ case status of the value. This is used only for ASCII characters. */
int req_caseopt = (options & IgnoreCaseOption) ? REQ_IGNORE_CASE : 0; int req_caseopt = (options & IgnoreCaseOption) ? REQ_IGNORE_CASE : 0;
/* Switch on next character until the end of the branch */ /* Switch on next character until the end of the branch */
for (;; ptr++) { for (;; ptr++) {
bool negate_class; bool negate_class;
bool should_flip_negation; /* If a negative special such as \S is used, we should negate the whole class to properly support Unicode. */ bool should_flip_negation; /* If a negative special such as \S is used, we should negate the whole class to properly support Unicode. */
...@@ -612,19 +606,19 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -612,19 +606,19 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
int subfirstbyte; int subfirstbyte;
int mclength; int mclength;
unsigned char mcbuffer[8]; unsigned char mcbuffer[8];
/* Next byte in the pattern */ /* Next byte in the pattern */
c = ptr < patternEnd ? *ptr : 0; c = ptr < patternEnd ? *ptr : 0;
/* Fill in length of a previous callout, except when the next thing is /* Fill in length of a previous callout, except when the next thing is
a quantifier. */ a quantifier. */
bool is_quantifier = c == '*' || c == '+' || c == '?' || (c == '{' && isCountedRepeat(ptr + 1, patternEnd)); bool is_quantifier = c == '*' || c == '+' || c == '?' || (c == '{' && isCountedRepeat(ptr + 1, patternEnd));
switch (c) { switch (c) {
/* The branch terminates at end of string, |, or ). */ /* The branch terminates at end of string, |, or ). */
case 0: case 0:
if (ptr < patternEnd) if (ptr < patternEnd)
goto NORMAL_CHAR; goto NORMAL_CHAR;
...@@ -636,7 +630,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -636,7 +630,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
*codeptr = code; *codeptr = code;
*ptrptr = ptr; *ptrptr = ptr;
return true; return true;
/* Handle single-character metacharacters. In multiline mode, ^ disables /* Handle single-character metacharacters. In multiline mode, ^ disables
the setting of any following char as a first character. */ the setting of any following char as a first character. */
...@@ -669,26 +663,26 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -669,26 +663,26 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
previous = code; previous = code;
*code++ = OP_NOT_NEWLINE; *code++ = OP_NOT_NEWLINE;
break; break;
/* Character classes. If the included characters are all < 256, we build a /* Character classes. If the included characters are all < 256, we build a
32-byte bitmap of the permitted characters, except in the special case 32-byte bitmap of the permitted characters, except in the special case
where there is only one such character. For negated classes, we build the where there is only one such character. For negated classes, we build the
map as usual, then invert it at the end. However, we use a different opcode map as usual, then invert it at the end. However, we use a different opcode
so that data characters > 255 can be handled correctly. so that data characters > 255 can be handled correctly.
If the class contains characters outside the 0-255 range, a different If the class contains characters outside the 0-255 range, a different
opcode is compiled. It may optionally have a bit map for characters < 256, opcode is compiled. It may optionally have a bit map for characters < 256,
but those above are are explicitly listed afterwards. A flag byte tells but those above are are explicitly listed afterwards. A flag byte tells
whether the bitmap is present, and whether this is a negated class or not. whether the bitmap is present, and whether this is a negated class or not.
*/ */
case '[': { case '[': {
previous = code; previous = code;
should_flip_negation = false; should_flip_negation = false;
/* PCRE supports POSIX class stuff inside a class. Perl gives an error if /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
they are encountered at the top level, so we'll do that too. */ they are encountered at the top level, so we'll do that too. */
/* If the first character is '^', set the negation flag and skip it. */ /* If the first character is '^', set the negation flag and skip it. */
if (ptr + 1 >= patternEnd) { if (ptr + 1 >= patternEnd) {
...@@ -701,24 +695,24 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -701,24 +695,24 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
++ptr; ++ptr;
} else } else
negate_class = false; negate_class = false;
/* Keep a count of chars with values < 256 so that we can optimize the case /* Keep a count of chars with values < 256 so that we can optimize the case
of just a single character (as long as it's < 256). For higher valued UTF-8 of just a single character (as long as it's < 256). For higher valued UTF-8
characters, we don't yet do any optimization. */ characters, we don't yet do any optimization. */
class_charcount = 0; class_charcount = 0;
class_lastchar = -1; class_lastchar = -1;
class_utf8 = false; /* No chars >= 256 */ class_utf8 = false; /* No chars >= 256 */
class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */ class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */
/* Initialize the 32-char bit map to all zeros. We have to build the /* Initialize the 32-char bit map to all zeros. We have to build the
map in a temporary bit of store, in case the class contains only 1 map in a temporary bit of store, in case the class contains only 1
character (< 256), because in that case the compiled code doesn't use the character (< 256), because in that case the compiled code doesn't use the
bit map. */ bit map. */
memset(classbits, 0, 32 * sizeof(unsigned char)); memset(classbits, 0, 32 * sizeof(unsigned char));
/* Process characters until ] is reached. The first pass /* Process characters until ] is reached. The first pass
through the regex checked the overall syntax, so we don't need to be very through the regex checked the overall syntax, so we don't need to be very
strict here. At the start of the loop, c contains the first byte of the strict here. At the start of the loop, c contains the first byte of the
...@@ -732,7 +726,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -732,7 +726,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
it marks a word boundary. Other escapes have preset maps ready to it marks a word boundary. Other escapes have preset maps ready to
or into the one we are building. We assume they have more than one or into the one we are building. We assume they have more than one
character in them, so set class_charcount bigger than one. */ character in them, so set class_charcount bigger than one. */
if (c == '\\') { if (c == '\\') {
c = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCapturingBrackets, true); c = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCapturingBrackets, true);
if (c < 0) { if (c < 0) {
...@@ -742,92 +736,92 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -742,92 +736,92 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
for (c = 0; c < 32; c++) for (c = 0; c < 32; c++)
classbits[c] |= classBitmapForChar(c + cbit_digit); classbits[c] |= classBitmapForChar(c + cbit_digit);
continue; continue;
case ESC_D: case ESC_D:
should_flip_negation = true; should_flip_negation = true;
for (c = 0; c < 32; c++) for (c = 0; c < 32; c++)
classbits[c] |= ~classBitmapForChar(c + cbit_digit); classbits[c] |= ~classBitmapForChar(c + cbit_digit);
continue; continue;
case ESC_w: case ESC_w:
for (c = 0; c < 32; c++) for (c = 0; c < 32; c++)
classbits[c] |= classBitmapForChar(c + cbit_word); classbits[c] |= classBitmapForChar(c + cbit_word);
continue; continue;
case ESC_W: case ESC_W:
should_flip_negation = true; should_flip_negation = true;
for (c = 0; c < 32; c++) for (c = 0; c < 32; c++)
classbits[c] |= ~classBitmapForChar(c + cbit_word); classbits[c] |= ~classBitmapForChar(c + cbit_word);
continue; continue;
case ESC_s: case ESC_s:
for (c = 0; c < 32; c++) for (c = 0; c < 32; c++)
classbits[c] |= classBitmapForChar(c + cbit_space); classbits[c] |= classBitmapForChar(c + cbit_space);
continue; continue;
case ESC_S: case ESC_S:
should_flip_negation = true; should_flip_negation = true;
for (c = 0; c < 32; c++) for (c = 0; c < 32; c++)
classbits[c] |= ~classBitmapForChar(c + cbit_space); classbits[c] |= ~classBitmapForChar(c + cbit_space);
continue; continue;
/* Unrecognized escapes are faulted if PCRE is running in its /* Unrecognized escapes are faulted if PCRE is running in its
strict mode. By default, for compatibility with Perl, they are strict mode. By default, for compatibility with Perl, they are
treated as literals. */ treated as literals. */
default: default:
c = *ptr; /* The final character */ c = *ptr; /* The final character */
class_charcount -= 2; /* Undo the default count from above */ class_charcount -= 2; /* Undo the default count from above */
} }
} }
/* Fall through if we have a single character (c >= 0). This may be /* Fall through if we have a single character (c >= 0). This may be
> 256 in UTF-8 mode. */ > 256 in UTF-8 mode. */
} /* End of backslash handling */ } /* End of backslash handling */
/* A single character may be followed by '-' to form a range. However, /* A single character may be followed by '-' to form a range. However,
Perl does not permit ']' to be the end of the range. A '-' character Perl does not permit ']' to be the end of the range. A '-' character
here is treated as a literal. */ here is treated as a literal. */
if ((ptr + 2 < patternEnd) && ptr[1] == '-' && ptr[2] != ']') { if ((ptr + 2 < patternEnd) && ptr[1] == '-' && ptr[2] != ']') {
ptr += 2; ptr += 2;
int d = *ptr; int d = *ptr;
/* The second part of a range can be a single-character escape, but /* The second part of a range can be a single-character escape, but
not any of the other escapes. Perl 5.6 treats a hyphen as a literal not any of the other escapes. Perl 5.6 treats a hyphen as a literal
in such circumstances. */ in such circumstances. */
if (d == '\\') { if (d == '\\') {
const Char* oldptr = ptr; const UChar* oldptr = ptr;
d = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCapturingBrackets, true); d = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCapturingBrackets, true);
/* \X is literal X; any other special means the '-' was literal */ /* \X is literal X; any other special means the '-' was literal */
if (d < 0) { if (d < 0) {
ptr = oldptr - 2; ptr = oldptr - 2;
goto LONE_SINGLE_CHARACTER; /* A few lines below */ goto LONE_SINGLE_CHARACTER; /* A few lines below */
} }
} }
/* The check that the two values are in the correct order happens in /* The check that the two values are in the correct order happens in
the pre-pass. Optimize one-character ranges */ the pre-pass. Optimize one-character ranges */
if (d == c) if (d == c)
goto LONE_SINGLE_CHARACTER; /* A few lines below */ goto LONE_SINGLE_CHARACTER; /* A few lines below */
/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
matching, we have to use an XCLASS with extra data items. Caseless matching, we have to use an XCLASS with extra data items. Caseless
matching for characters > 127 is available only if UCP support is matching for characters > 127 is available only if UCP support is
available. */ available. */
if ((d > 255 || ((options & IgnoreCaseOption) && d > 127))) { if ((d > 255 || ((options & IgnoreCaseOption) && d > 127))) {
class_utf8 = true; class_utf8 = true;
/* With UCP support, we can find the other case equivalents of /* With UCP support, we can find the other case equivalents of
the relevant characters. There may be several ranges. Optimize how the relevant characters. There may be several ranges. Optimize how
they fit with the basic range. */ they fit with the basic range. */
if (options & IgnoreCaseOption) { if (options & IgnoreCaseOption) {
int occ, ocd; int occ, ocd;
int cc = c; int cc = c;
...@@ -835,7 +829,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -835,7 +829,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
while (getOthercaseRange(&cc, origd, &occ, &ocd)) { while (getOthercaseRange(&cc, origd, &occ, &ocd)) {
if (occ >= c && ocd <= d) if (occ >= c && ocd <= d)
continue; /* Skip embedded ranges */ continue; /* Skip embedded ranges */
if (occ < c && ocd >= c - 1) /* Extend the basic range */ if (occ < c && ocd >= c - 1) /* Extend the basic range */
{ /* if there is overlap, */ { /* if there is overlap, */
c = occ; /* noting that if occ < c */ c = occ; /* noting that if occ < c */
...@@ -846,7 +840,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -846,7 +840,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
d = ocd; d = ocd;
continue; continue;
} }
if (occ == ocd) if (occ == ocd)
*class_utf8data++ = XCL_SINGLE; *class_utf8data++ = XCL_SINGLE;
else { else {
...@@ -856,25 +850,25 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -856,25 +850,25 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
class_utf8data += encodeUTF8(ocd, class_utf8data); class_utf8data += encodeUTF8(ocd, class_utf8data);
} }
} }
/* Now record the original range, possibly modified for UCP caseless /* Now record the original range, possibly modified for UCP caseless
overlapping ranges. */ overlapping ranges. */
*class_utf8data++ = XCL_RANGE; *class_utf8data++ = XCL_RANGE;
class_utf8data += encodeUTF8(c, class_utf8data); class_utf8data += encodeUTF8(c, class_utf8data);
class_utf8data += encodeUTF8(d, class_utf8data); class_utf8data += encodeUTF8(d, class_utf8data);
/* With UCP support, we are done. Without UCP support, there is no /* With UCP support, we are done. Without UCP support, there is no
caseless matching for UTF-8 characters > 127; we can use the bit map caseless matching for UTF-8 characters > 127; we can use the bit map
for the smaller ones. */ for the smaller ones. */
continue; /* With next character in the class */ continue; /* With next character in the class */
} }
/* We use the bit map for all cases when not in UTF-8 mode; else /* We use the bit map for all cases when not in UTF-8 mode; else
ranges that lie entirely within 0-127 when there is UCP support; else ranges that lie entirely within 0-127 when there is UCP support; else
for partial ranges without UCP support. */ for partial ranges without UCP support. */
for (; c <= d; c++) { for (; c <= d; c++) {
classbits[c/8] |= (1 << (c&7)); classbits[c/8] |= (1 << (c&7));
if (options & IgnoreCaseOption) { if (options & IgnoreCaseOption) {
...@@ -884,23 +878,23 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -884,23 +878,23 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
class_charcount++; /* in case a one-char range */ class_charcount++; /* in case a one-char range */
class_lastchar = c; class_lastchar = c;
} }
continue; /* Go get the next char in the class */ continue; /* Go get the next char in the class */
} }
/* Handle a lone single character - we can get here for a normal /* Handle a lone single character - we can get here for a normal
non-escape char, or after \ that introduces a single character or for an non-escape char, or after \ that introduces a single character or for an
apparent range that isn't. */ apparent range that isn't. */
LONE_SINGLE_CHARACTER: LONE_SINGLE_CHARACTER:
/* Handle a character that cannot go in the bit map */ /* Handle a character that cannot go in the bit map */
if ((c > 255 || ((options & IgnoreCaseOption) && c > 127))) { if ((c > 255 || ((options & IgnoreCaseOption) && c > 127))) {
class_utf8 = true; class_utf8 = true;
*class_utf8data++ = XCL_SINGLE; *class_utf8data++ = XCL_SINGLE;
class_utf8data += encodeUTF8(c, class_utf8data); class_utf8data += encodeUTF8(c, class_utf8data);
if (options & IgnoreCaseOption) { if (options & IgnoreCaseOption) {
int othercase; int othercase;
if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0) { if ((othercase = kjs_pcre_ucp_othercase(c)) >= 0) {
...@@ -919,26 +913,26 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -919,26 +913,26 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
class_lastchar = c; class_lastchar = c;
} }
} }
/* If class_charcount is 1, we saw precisely one character whose value is /* If class_charcount is 1, we saw precisely one character whose value is
less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we
can optimize the negative case only if there were no characters >= 128 can optimize the negative case only if there were no characters >= 128
because OP_NOT and the related opcodes like OP_NOTSTAR operate on because OP_NOT and the related opcodes like OP_NOTSTAR operate on
single-bytes only. This is an historical hangover. Maybe one day we can single-bytes only. This is an historical hangover. Maybe one day we can
tidy these opcodes to handle multi-byte characters. tidy these opcodes to handle multi-byte characters.
The optimization throws away the bit map. We turn the item into a The optimization throws away the bit map. We turn the item into a
1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note 1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note
that OP_NOT does not support multibyte characters. In the positive case, it that OP_NOT does not support multibyte characters. In the positive case, it
can cause firstbyte to be set. Otherwise, there can be no first char if can cause firstbyte to be set. Otherwise, there can be no first char if
this item is first, whatever repeat count may follow. In the case of this item is first, whatever repeat count may follow. In the case of
reqbyte, save the previous value for reinstating. */ reqbyte, save the previous value for reinstating. */
if (class_charcount == 1 && (!class_utf8 && (!negate_class || class_lastchar < 128))) { if (class_charcount == 1 && (!class_utf8 && (!negate_class || class_lastchar < 128))) {
zeroreqbyte = reqbyte; zeroreqbyte = reqbyte;
/* The OP_NOT opcode works on one-byte characters only. */ /* The OP_NOT opcode works on one-byte characters only. */
if (negate_class) { if (negate_class) {
if (firstbyte == REQ_UNSET) if (firstbyte == REQ_UNSET)
firstbyte = REQ_NONE; firstbyte = REQ_NONE;
...@@ -947,61 +941,61 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -947,61 +941,61 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
*code++ = class_lastchar; *code++ = class_lastchar;
break; break;
} }
/* For a single, positive character, get the value into c, and /* For a single, positive character, get the value into c, and
then we can handle this with the normal one-character code. */ then we can handle this with the normal one-character code. */
c = class_lastchar; c = class_lastchar;
goto NORMAL_CHAR; goto NORMAL_CHAR;
} /* End of 1-char optimization */ } /* End of 1-char optimization */
/* The general case - not the one-char optimization. If this is the first /* The general case - not the one-char optimization. If this is the first
thing in the branch, there can be no first char setting, whatever the thing in the branch, there can be no first char setting, whatever the
repeat count. Any reqbyte setting must remain unchanged after any kind of repeat count. Any reqbyte setting must remain unchanged after any kind of
repeat. */ repeat. */
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
zerofirstbyte = firstbyte; zerofirstbyte = firstbyte;
zeroreqbyte = reqbyte; zeroreqbyte = reqbyte;
/* If there are characters with values > 255, we have to compile an /* If there are characters with values > 255, we have to compile an
extended class, with its own opcode. If there are no characters < 256, extended class, with its own opcode. If there are no characters < 256,
we can omit the bitmap. */ we can omit the bitmap. */
if (class_utf8 && !should_flip_negation) { if (class_utf8 && !should_flip_negation) {
*class_utf8data++ = XCL_END; /* Marks the end of extra data */ *class_utf8data++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS; *code++ = OP_XCLASS;
code += LINK_SIZE; code += LINK_SIZE;
*code = negate_class? XCL_NOT : 0; *code = negate_class? XCL_NOT : 0;
/* If the map is required, install it, and move on to the end of /* If the map is required, install it, and move on to the end of
the extra data */ the extra data */
if (class_charcount > 0) { if (class_charcount > 0) {
*code++ |= XCL_MAP; *code++ |= XCL_MAP;
memcpy(code, classbits, 32); memcpy(code, classbits, 32);
code = class_utf8data; code = class_utf8data;
} }
/* If the map is not required, slide down the extra data. */ /* If the map is not required, slide down the extra data. */
else { else {
int len = class_utf8data - (code + 33); int len = class_utf8data - (code + 33);
memmove(code + 1, code + 33, len); memmove(code + 1, code + 33, len);
code += len + 1; code += len + 1;
} }
/* Now fill in the complete length of the item */ /* Now fill in the complete length of the item */
putLinkValue(previous + 1, code - previous); putLinkValue(previous + 1, code - previous);
break; /* End of class handling */ break; /* End of class handling */
} }
/* If there are no characters > 255, negate the 32-byte map if necessary, /* If there are no characters > 255, negate the 32-byte map if necessary,
and copy it into the code vector. If this is the first thing in the branch, and copy it into the code vector. If this is the first thing in the branch,
there can be no first char setting, whatever the repeat count. Any reqbyte there can be no first char setting, whatever the repeat count. Any reqbyte
setting must remain unchanged after any kind of repeat. */ setting must remain unchanged after any kind of repeat. */
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
if (negate_class) if (negate_class)
for (c = 0; c < 32; c++) for (c = 0; c < 32; c++)
...@@ -1011,7 +1005,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1011,7 +1005,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
code += 32; code += 32;
break; break;
} }
/* Various kinds of repeat; '{' is not necessarily a quantifier, but this /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
has been tested above. */ has been tested above. */
...@@ -1022,68 +1016,68 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1022,68 +1016,68 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
if (*errorcodeptr) if (*errorcodeptr)
goto FAILED; goto FAILED;
goto REPEAT; goto REPEAT;
case '*': case '*':
repeat_min = 0; repeat_min = 0;
repeat_max = -1; repeat_max = -1;
goto REPEAT; goto REPEAT;
case '+': case '+':
repeat_min = 1; repeat_min = 1;
repeat_max = -1; repeat_max = -1;
goto REPEAT; goto REPEAT;
case '?': case '?':
repeat_min = 0; repeat_min = 0;
repeat_max = 1; repeat_max = 1;
REPEAT: REPEAT:
if (!previous) { if (!previous) {
*errorcodeptr = ERR9; *errorcodeptr = ERR9;
goto FAILED; goto FAILED;
} }
if (repeat_min == 0) { if (repeat_min == 0) {
firstbyte = zerofirstbyte; /* Adjust for zero repeat */ firstbyte = zerofirstbyte; /* Adjust for zero repeat */
reqbyte = zeroreqbyte; /* Ditto */ reqbyte = zeroreqbyte; /* Ditto */
} }
/* Remember whether this is a variable length repeat */ /* Remember whether this is a variable length repeat */
reqvary = (repeat_min == repeat_max) ? 0 : REQ_VARY; reqvary = (repeat_min == repeat_max) ? 0 : REQ_VARY;
op_type = 0; /* Default single-char op codes */ op_type = 0; /* Default single-char op codes */
/* Save start of previous item, in case we have to move it up to make space /* Save start of previous item, in case we have to move it up to make space
for an inserted OP_ONCE for the additional '+' extension. */ for an inserted OP_ONCE for the additional '+' extension. */
/* FIXME: Probably don't need this because we don't use OP_ONCE. */ /* FIXME: Probably don't need this because we don't use OP_ONCE. */
tempcode = previous; tempcode = previous;
/* If the next character is '+', we have a possessive quantifier. This /* If the next character is '+', we have a possessive quantifier. This
implies greediness, whatever the setting of the PCRE_UNGREEDY option. implies greediness, whatever the setting of the PCRE_UNGREEDY option.
If the next character is '?' this is a minimizing repeat, by default, If the next character is '?' this is a minimizing repeat, by default,
but if PCRE_UNGREEDY is set, it works the other way round. We change the but if PCRE_UNGREEDY is set, it works the other way round. We change the
repeat type to the non-default. */ repeat type to the non-default. */
if (safelyCheckNextChar(ptr, patternEnd, '?')) { if (safelyCheckNextChar(ptr, patternEnd, '?')) {
repeat_type = 1; repeat_type = 1;
ptr++; ptr++;
} else } else
repeat_type = 0; repeat_type = 0;
/* If previous was a character match, abolish the item and generate a /* If previous was a character match, abolish the item and generate a
repeat item instead. If a char item has a minumum of more than one, ensure repeat item instead. If a char item has a minumum of more than one, ensure
that it is set in reqbyte - it might not be if a sequence such as x{3} is that it is set in reqbyte - it might not be if a sequence such as x{3} is
the first thing in a branch because the x will have gone into firstbyte the first thing in a branch because the x will have gone into firstbyte
instead. */ instead. */
if (*previous == OP_CHAR || *previous == OP_CHAR_IGNORING_CASE) { if (*previous == OP_CHAR || *previous == OP_CHAR_IGNORING_CASE) {
/* Deal with UTF-8 characters that take up more than one byte. It's /* Deal with UTF-8 characters that take up more than one byte. It's
easier to write this out separately than try to macrify it. Use c to easier to write this out separately than try to macrify it. Use c to
hold the length of the character in bytes, plus 0x80 to flag that it's a hold the length of the character in bytes, plus 0x80 to flag that it's a
length rather than a small character. */ length rather than a small character. */
if (code[-1] & 0x80) { if (code[-1] & 0x80) {
unsigned char *lastchar = code - 1; unsigned char *lastchar = code - 1;
while((*lastchar & 0xc0) == 0x80) while((*lastchar & 0xc0) == 0x80)
...@@ -1097,56 +1091,56 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1097,56 +1091,56 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
if (repeat_min > 1) if (repeat_min > 1)
reqbyte = c | req_caseopt | cd.req_varyopt; reqbyte = c | req_caseopt | cd.req_varyopt;
} }
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
} }
else if (*previous == OP_ASCII_CHAR || *previous == OP_ASCII_LETTER_IGNORING_CASE) { else if (*previous == OP_ASCII_CHAR || *previous == OP_ASCII_LETTER_IGNORING_CASE) {
c = previous[1]; c = previous[1];
if (repeat_min > 1) if (repeat_min > 1)
reqbyte = c | req_caseopt | cd.req_varyopt; reqbyte = c | req_caseopt | cd.req_varyopt;
goto OUTPUT_SINGLE_REPEAT; goto OUTPUT_SINGLE_REPEAT;
} }
/* If previous was a single negated character ([^a] or similar), we use /* If previous was a single negated character ([^a] or similar), we use
one of the special opcodes, replacing it. The code is shared with single- one of the special opcodes, replacing it. The code is shared with single-
character repeats by setting opt_type to add a suitable offset into character repeats by setting opt_type to add a suitable offset into
repeat_type. OP_NOT is currently used only for single-byte chars. */ repeat_type. OP_NOT is currently used only for single-byte chars. */
else if (*previous == OP_NOT) { else if (*previous == OP_NOT) {
op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
c = previous[1]; c = previous[1];
goto OUTPUT_SINGLE_REPEAT; goto OUTPUT_SINGLE_REPEAT;
} }
/* If previous was a character type match (\d or similar), abolish it and /* If previous was a character type match (\d or similar), abolish it and
create a suitable repeat item. The code is shared with single-character create a suitable repeat item. The code is shared with single-character
repeats by setting op_type to add a suitable offset into repeat_type. */ repeats by setting op_type to add a suitable offset into repeat_type. */
else if (*previous <= OP_NOT_NEWLINE) { else if (*previous <= OP_NOT_NEWLINE) {
op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
c = *previous; c = *previous;
OUTPUT_SINGLE_REPEAT: OUTPUT_SINGLE_REPEAT:
int prop_type = -1; int prop_type = -1;
int prop_value = -1; int prop_value = -1;
unsigned char* oldcode = code; unsigned char* oldcode = code;
code = previous; /* Usually overwrite previous item */ code = previous; /* Usually overwrite previous item */
/* If the maximum is zero then the minimum must also be zero; Perl allows /* If the maximum is zero then the minimum must also be zero; Perl allows
this case, so we do too - by simply omitting the item altogether. */ this case, so we do too - by simply omitting the item altogether. */
if (repeat_max == 0) if (repeat_max == 0)
goto END_REPEAT; goto END_REPEAT;
/* Combine the op_type with the repeat_type */ /* Combine the op_type with the repeat_type */
repeat_type += op_type; repeat_type += op_type;
/* A minimum of zero is handled either as the special case * or ?, or as /* A minimum of zero is handled either as the special case * or ?, or as
an UPTO, with the maximum given. */ an UPTO, with the maximum given. */
if (repeat_min == 0) { if (repeat_min == 0) {
if (repeat_max == -1) if (repeat_max == -1)
*code++ = OP_STAR + repeat_type; *code++ = OP_STAR + repeat_type;
...@@ -1157,12 +1151,12 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1157,12 +1151,12 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
put2ByteValueAndAdvance(code, repeat_max); put2ByteValueAndAdvance(code, repeat_max);
} }
} }
/* A repeat minimum of 1 is optimized into some special cases. If the /* A repeat minimum of 1 is optimized into some special cases. If the
maximum is unlimited, we use OP_PLUS. Otherwise, the original item it maximum is unlimited, we use OP_PLUS. Otherwise, the original item it
left in place and, if the maximum is greater than 1, we use OP_UPTO with left in place and, if the maximum is greater than 1, we use OP_UPTO with
one less than the maximum. */ one less than the maximum. */
else if (repeat_min == 1) { else if (repeat_min == 1) {
if (repeat_max == -1) if (repeat_max == -1)
*code++ = OP_PLUS + repeat_type; *code++ = OP_PLUS + repeat_type;
...@@ -1174,20 +1168,20 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1174,20 +1168,20 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
put2ByteValueAndAdvance(code, repeat_max - 1); put2ByteValueAndAdvance(code, repeat_max - 1);
} }
} }
/* The case {n,n} is just an EXACT, while the general case {n,m} is /* The case {n,n} is just an EXACT, while the general case {n,m} is
handled as an EXACT followed by an UPTO. */ handled as an EXACT followed by an UPTO. */
else { else {
*code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
put2ByteValueAndAdvance(code, repeat_min); put2ByteValueAndAdvance(code, repeat_min);
/* If the maximum is unlimited, insert an OP_STAR. Before doing so, /* If the maximum is unlimited, insert an OP_STAR. Before doing so,
we have to insert the character for the previous code. For a repeated we have to insert the character for the previous code. For a repeated
Unicode property match, there are two extra bytes that define the Unicode property match, there are two extra bytes that define the
required property. In UTF-8 mode, long characters have their length in required property. In UTF-8 mode, long characters have their length in
c, with the 0x80 bit as a flag. */ c, with the 0x80 bit as a flag. */
if (repeat_max < 0) { if (repeat_max < 0) {
if (c >= 128) { if (c >= 128) {
memcpy(code, utf8_char, c & 7); memcpy(code, utf8_char, c & 7);
...@@ -1201,10 +1195,10 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1201,10 +1195,10 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
} }
*code++ = OP_STAR + repeat_type; *code++ = OP_STAR + repeat_type;
} }
/* Else insert an UPTO if the max is greater than the min, again /* Else insert an UPTO if the max is greater than the min, again
preceded by the character, for the previously inserted code. */ preceded by the character, for the previously inserted code. */
else if (repeat_max != repeat_min) { else if (repeat_max != repeat_min) {
if (c >= 128) { if (c >= 128) {
memcpy(code, utf8_char, c & 7); memcpy(code, utf8_char, c & 7);
...@@ -1220,27 +1214,27 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1220,27 +1214,27 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
put2ByteValueAndAdvance(code, repeat_max); put2ByteValueAndAdvance(code, repeat_max);
} }
} }
/* The character or character type itself comes last in all cases. */ /* The character or character type itself comes last in all cases. */
if (c >= 128) { if (c >= 128) {
memcpy(code, utf8_char, c & 7); memcpy(code, utf8_char, c & 7);
code += c & 7; code += c & 7;
} else } else
*code++ = c; *code++ = c;
/* For a repeated Unicode property match, there are two extra bytes that /* For a repeated Unicode property match, there are two extra bytes that
define the required property. */ define the required property. */
if (prop_type >= 0) { if (prop_type >= 0) {
*code++ = prop_type; *code++ = prop_type;
*code++ = prop_value; *code++ = prop_value;
} }
} }
/* If previous was a character class or a back reference, we put the repeat /* If previous was a character class or a back reference, we put the repeat
stuff after it, but just skip the item if the repeat was {0,0}. */ stuff after it, but just skip the item if the repeat was {0,0}. */
else if (*previous == OP_CLASS || else if (*previous == OP_CLASS ||
*previous == OP_NCLASS || *previous == OP_NCLASS ||
*previous == OP_XCLASS || *previous == OP_XCLASS ||
...@@ -1250,7 +1244,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1250,7 +1244,7 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
code = previous; code = previous;
goto END_REPEAT; goto END_REPEAT;
} }
if (repeat_min == 0 && repeat_max == -1) if (repeat_min == 0 && repeat_max == -1)
*code++ = OP_CRSTAR + repeat_type; *code++ = OP_CRSTAR + repeat_type;
else if (repeat_min == 1 && repeat_max == -1) else if (repeat_min == 1 && repeat_max == -1)
...@@ -1265,86 +1259,86 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1265,86 +1259,86 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
put2ByteValueAndAdvance(code, repeat_max); put2ByteValueAndAdvance(code, repeat_max);
} }
} }
/* If previous was a bracket group, we may have to replicate it in certain /* If previous was a bracket group, we may have to replicate it in certain
cases. */ cases. */
else if (*previous >= OP_BRA) { else if (*previous >= OP_BRA) {
int ketoffset = 0; int ketoffset = 0;
int len = code - previous; int len = code - previous;
unsigned char* bralink = NULL; unsigned char* bralink = NULL;
/* If the maximum repeat count is unlimited, find the end of the bracket /* If the maximum repeat count is unlimited, find the end of the bracket
by scanning through from the start, and compute the offset back to it by scanning through from the start, and compute the offset back to it
from the current code pointer. There may be an OP_OPT setting following from the current code pointer. There may be an OP_OPT setting following
the final KET, so we can't find the end just by going back from the code the final KET, so we can't find the end just by going back from the code
pointer. */ pointer. */
if (repeat_max == -1) { if (repeat_max == -1) {
const unsigned char* ket = previous; const unsigned char* ket = previous;
advanceToEndOfBracket(ket); advanceToEndOfBracket(ket);
ketoffset = code - ket; ketoffset = code - ket;
} }
/* The case of a zero minimum is special because of the need to stick /* The case of a zero minimum is special because of the need to stick
OP_BRAZERO in front of it, and because the group appears once in the OP_BRAZERO in front of it, and because the group appears once in the
data, whereas in other cases it appears the minimum number of times. For data, whereas in other cases it appears the minimum number of times. For
this reason, it is simplest to treat this case separately, as otherwise this reason, it is simplest to treat this case separately, as otherwise
the code gets far too messy. There are several special subcases when the the code gets far too messy. There are several special subcases when the
minimum is zero. */ minimum is zero. */
if (repeat_min == 0) { if (repeat_min == 0) {
/* If the maximum is also zero, we just omit the group from the output /* If the maximum is also zero, we just omit the group from the output
altogether. */ altogether. */
if (repeat_max == 0) { if (repeat_max == 0) {
code = previous; code = previous;
goto END_REPEAT; goto END_REPEAT;
} }
/* If the maximum is 1 or unlimited, we just have to stick in the /* If the maximum is 1 or unlimited, we just have to stick in the
BRAZERO and do no more at this point. However, we do need to adjust BRAZERO and do no more at this point. However, we do need to adjust
any OP_RECURSE calls inside the group that refer to the group itself or any OP_RECURSE calls inside the group that refer to the group itself or
any internal group, because the offset is from the start of the whole any internal group, because the offset is from the start of the whole
regex. Temporarily terminate the pattern while doing this. */ regex. Temporarily terminate the pattern while doing this. */
if (repeat_max <= 1) { if (repeat_max <= 1) {
*code = OP_END; *code = OP_END;
memmove(previous+1, previous, len); memmove(previous+1, previous, len);
code++; code++;
*previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRAZERO + repeat_type;
} }
/* If the maximum is greater than 1 and limited, we have to replicate /* If the maximum is greater than 1 and limited, we have to replicate
in a nested fashion, sticking OP_BRAZERO before each set of brackets. in a nested fashion, sticking OP_BRAZERO before each set of brackets.
The first one has to be handled carefully because it's the original The first one has to be handled carefully because it's the original
copy, which has to be moved up. The remainder can be handled by code copy, which has to be moved up. The remainder can be handled by code
that is common with the non-zero minimum case below. We have to that is common with the non-zero minimum case below. We have to
adjust the value of repeat_max, since one less copy is required. */ adjust the value of repeat_max, since one less copy is required. */
else { else {
*code = OP_END; *code = OP_END;
memmove(previous + 2 + LINK_SIZE, previous, len); memmove(previous + 2 + LINK_SIZE, previous, len);
code += 2 + LINK_SIZE; code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA; *previous++ = OP_BRA;
/* We chain together the bracket offset fields that have to be /* We chain together the bracket offset fields that have to be
filled in later when the ends of the brackets are reached. */ filled in later when the ends of the brackets are reached. */
int offset = (!bralink) ? 0 : previous - bralink; int offset = (!bralink) ? 0 : previous - bralink;
bralink = previous; bralink = previous;
putLinkValueAllowZeroAndAdvance(previous, offset); putLinkValueAllowZeroAndAdvance(previous, offset);
} }
repeat_max--; repeat_max--;
} }
/* If the minimum is greater than zero, replicate the group as many /* If the minimum is greater than zero, replicate the group as many
times as necessary, and adjust the maximum to the number of subsequent times as necessary, and adjust the maximum to the number of subsequent
copies that we need. If we set a first char from the group, and didn't copies that we need. If we set a first char from the group, and didn't
set a required char, copy the latter from the former. */ set a required char, copy the latter from the former. */
else { else {
if (repeat_min > 1) { if (repeat_min > 1) {
if (groupsetfirstbyte && reqbyte < 0) if (groupsetfirstbyte && reqbyte < 0)
...@@ -1357,34 +1351,34 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1357,34 +1351,34 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
if (repeat_max > 0) if (repeat_max > 0)
repeat_max -= repeat_min; repeat_max -= repeat_min;
} }
/* This code is common to both the zero and non-zero minimum cases. If /* This code is common to both the zero and non-zero minimum cases. If
the maximum is limited, it replicates the group in a nested fashion, the maximum is limited, it replicates the group in a nested fashion,
remembering the bracket starts on a stack. In the case of a zero minimum, remembering the bracket starts on a stack. In the case of a zero minimum,
the first one was set up above. In all cases the repeat_max now specifies the first one was set up above. In all cases the repeat_max now specifies
the number of additional copies needed. */ the number of additional copies needed. */
if (repeat_max >= 0) { if (repeat_max >= 0) {
for (int i = repeat_max - 1; i >= 0; i--) { for (int i = repeat_max - 1; i >= 0; i--) {
*code++ = OP_BRAZERO + repeat_type; *code++ = OP_BRAZERO + repeat_type;
/* All but the final copy start a new nesting, maintaining the /* All but the final copy start a new nesting, maintaining the
chain of brackets outstanding. */ chain of brackets outstanding. */
if (i != 0) { if (i != 0) {
*code++ = OP_BRA; *code++ = OP_BRA;
int offset = (!bralink) ? 0 : code - bralink; int offset = (!bralink) ? 0 : code - bralink;
bralink = code; bralink = code;
putLinkValueAllowZeroAndAdvance(code, offset); putLinkValueAllowZeroAndAdvance(code, offset);
} }
memcpy(code, previous, len); memcpy(code, previous, len);
code += len; code += len;
} }
/* Now chain through the pending brackets, and fill in their length /* Now chain through the pending brackets, and fill in their length
fields (which are holding the chain links pro tem). */ fields (which are holding the chain links pro tem). */
while (bralink) { while (bralink) {
int offset = code - bralink + 1; int offset = code - bralink + 1;
unsigned char* bra = code - offset; unsigned char* bra = code - offset;
...@@ -1395,71 +1389,71 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1395,71 +1389,71 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
putLinkValue(bra + 1, offset); putLinkValue(bra + 1, offset);
} }
} }
/* If the maximum is unlimited, set a repeater in the final copy. We /* If the maximum is unlimited, set a repeater in the final copy. We
can't just offset backwards from the current code point, because we can't just offset backwards from the current code point, because we
don't know if there's been an options resetting after the ket. The don't know if there's been an options resetting after the ket. The
correct offset was computed above. */ correct offset was computed above. */
else else
code[-ketoffset] = OP_KETRMAX + repeat_type; code[-ketoffset] = OP_KETRMAX + repeat_type;
} }
/* Else there's some kind of shambles */ /* Else there's some kind of shambles */
else { else {
*errorcodeptr = ERR11; *errorcodeptr = ERR11;
goto FAILED; goto FAILED;
} }
/* In all case we no longer have a previous item. We also set the /* In all case we no longer have a previous item. We also set the
"follows varying string" flag for subsequently encountered reqbytes if "follows varying string" flag for subsequently encountered reqbytes if
it isn't already set and we have just passed a varying length item. */ it isn't already set and we have just passed a varying length item. */
END_REPEAT: END_REPEAT:
previous = NULL; previous = NULL;
cd.req_varyopt |= reqvary; cd.req_varyopt |= reqvary;
break; break;
/* Start of nested bracket sub-expression, or comment or lookahead or /* Start of nested bracket sub-expression, or comment or lookahead or
lookbehind or option setting or condition. First deal with special things lookbehind or option setting or condition. First deal with special things
that can come after a bracket; all are introduced by ?, and the appearance that can come after a bracket; all are introduced by ?, and the appearance
of any of them means that this is not a referencing group. They were of any of them means that this is not a referencing group. They were
checked for validity in the first pass over the string, so we don't have to checked for validity in the first pass over the string, so we don't have to
check for syntax errors here. */ check for syntax errors here. */
case '(': case '(':
skipbytes = 0; skipbytes = 0;
if (*(++ptr) == '?') { if (*(++ptr) == '?') {
switch (*(++ptr)) { switch (*(++ptr)) {
case ':': /* Non-extracting bracket */ case ':': /* Non-extracting bracket */
bravalue = OP_BRA; bravalue = OP_BRA;
ptr++; ptr++;
break; break;
case '=': /* Positive lookahead */ case '=': /* Positive lookahead */
bravalue = OP_ASSERT; bravalue = OP_ASSERT;
ptr++; ptr++;
break; break;
case '!': /* Negative lookahead */ case '!': /* Negative lookahead */
bravalue = OP_ASSERT_NOT; bravalue = OP_ASSERT_NOT;
ptr++; ptr++;
break; break;
/* Character after (? not specially recognized */ /* Character after (? not specially recognized */
default: default:
*errorcodeptr = ERR12; *errorcodeptr = ERR12;
goto FAILED; goto FAILED;
} }
} }
/* Else we have a referencing group; adjust the opcode. If the bracket /* Else we have a referencing group; adjust the opcode. If the bracket
number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and
arrange for the true number to follow later, in an OP_BRANUMBER item. */ arrange for the true number to follow later, in an OP_BRANUMBER item. */
else { else {
if (++(*brackets) > EXTRACT_BASIC_MAX) { if (++(*brackets) > EXTRACT_BASIC_MAX) {
bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
...@@ -1470,17 +1464,17 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1470,17 +1464,17 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
else else
bravalue = OP_BRA + *brackets; bravalue = OP_BRA + *brackets;
} }
/* Process nested bracketed re. Assertions may not be repeated, but other /* Process nested bracketed re. Assertions may not be repeated, but other
kinds can be. We copy code into a non-variable in order to be able kinds can be. We copy code into a non-variable in order to be able
to pass its address because some compilers complain otherwise. Pass in a to pass its address because some compilers complain otherwise. Pass in a
new setting for the ims options if they have changed. */ new setting for the ims options if they have changed. */
previous = (bravalue >= OP_BRAZERO) ? code : 0; previous = (bravalue >= OP_BRAZERO) ? code : 0;
*code = bravalue; *code = bravalue;
tempcode = code; tempcode = code;
tempreqvary = cd.req_varyopt; /* Save value before bracket */ tempreqvary = cd.req_varyopt; /* Save value before bracket */
if (!compileBracket( if (!compileBracket(
options, options,
brackets, /* Extracting bracket count */ brackets, /* Extracting bracket count */
...@@ -1493,29 +1487,29 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1493,29 +1487,29 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
&subreqbyte, /* For possible last char */ &subreqbyte, /* For possible last char */
cd)) /* Tables block */ cd)) /* Tables block */
goto FAILED; goto FAILED;
/* At the end of compiling, code is still pointing to the start of the /* At the end of compiling, code is still pointing to the start of the
group, while tempcode has been updated to point past the end of the group group, while tempcode has been updated to point past the end of the group
and any option resetting that may follow it. The pattern pointer (ptr) and any option resetting that may follow it. The pattern pointer (ptr)
is on the bracket. */ is on the bracket. */
/* Handle updating of the required and first characters. Update for normal /* Handle updating of the required and first characters. Update for normal
brackets of all kinds, and conditions with two branches (see code above). brackets of all kinds, and conditions with two branches (see code above).
If the bracket is followed by a quantifier with zero repeat, we have to If the bracket is followed by a quantifier with zero repeat, we have to
back off. Hence the definition of zeroreqbyte and zerofirstbyte outside the back off. Hence the definition of zeroreqbyte and zerofirstbyte outside the
main loop so that they can be accessed for the back off. */ main loop so that they can be accessed for the back off. */
zeroreqbyte = reqbyte; zeroreqbyte = reqbyte;
zerofirstbyte = firstbyte; zerofirstbyte = firstbyte;
groupsetfirstbyte = false; groupsetfirstbyte = false;
if (bravalue >= OP_BRA) { if (bravalue >= OP_BRA) {
/* If we have not yet set a firstbyte in this branch, take it from the /* If we have not yet set a firstbyte in this branch, take it from the
subpattern, remembering that it was set here so that a repeat of more subpattern, remembering that it was set here so that a repeat of more
than one can replicate it as reqbyte if necessary. If the subpattern has than one can replicate it as reqbyte if necessary. If the subpattern has
no firstbyte, set "none" for the whole branch. In both cases, a zero no firstbyte, set "none" for the whole branch. In both cases, a zero
repeat forces firstbyte to "none". */ repeat forces firstbyte to "none". */
if (firstbyte == REQ_UNSET) { if (firstbyte == REQ_UNSET) {
if (subfirstbyte >= 0) { if (subfirstbyte >= 0) {
firstbyte = subfirstbyte; firstbyte = subfirstbyte;
...@@ -1525,21 +1519,21 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1525,21 +1519,21 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
firstbyte = REQ_NONE; firstbyte = REQ_NONE;
zerofirstbyte = REQ_NONE; zerofirstbyte = REQ_NONE;
} }
/* If firstbyte was previously set, convert the subpattern's firstbyte /* If firstbyte was previously set, convert the subpattern's firstbyte
into reqbyte if there wasn't one, using the vary flag that was in into reqbyte if there wasn't one, using the vary flag that was in
existence beforehand. */ existence beforehand. */
else if (subfirstbyte >= 0 && subreqbyte < 0) else if (subfirstbyte >= 0 && subreqbyte < 0)
subreqbyte = subfirstbyte | tempreqvary; subreqbyte = subfirstbyte | tempreqvary;
/* If the subpattern set a required byte (or set a first byte that isn't /* If the subpattern set a required byte (or set a first byte that isn't
really the first byte - see above), set it. */ really the first byte - see above), set it. */
if (subreqbyte >= 0) if (subreqbyte >= 0)
reqbyte = subreqbyte; reqbyte = subreqbyte;
} }
/* For a forward assertion, we take the reqbyte, if set. This can be /* For a forward assertion, we take the reqbyte, if set. This can be
helpful if the pattern that follows the assertion doesn't set a different helpful if the pattern that follows the assertion doesn't set a different
char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte
...@@ -1547,83 +1541,83 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1547,83 +1541,83 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead
of a firstbyte. This is overcome by a scan at the end if there's no of a firstbyte. This is overcome by a scan at the end if there's no
firstbyte, looking for an asserted first char. */ firstbyte, looking for an asserted first char. */
else if (bravalue == OP_ASSERT && subreqbyte >= 0) else if (bravalue == OP_ASSERT && subreqbyte >= 0)
reqbyte = subreqbyte; reqbyte = subreqbyte;
/* Now update the main code pointer to the end of the group. */ /* Now update the main code pointer to the end of the group. */
code = tempcode; code = tempcode;
/* Error if hit end of pattern */ /* Error if hit end of pattern */
if (ptr >= patternEnd || *ptr != ')') { if (ptr >= patternEnd || *ptr != ')') {
*errorcodeptr = ERR14; *errorcodeptr = ERR14;
goto FAILED; goto FAILED;
} }
break; break;
/* Check \ for being a real metacharacter; if not, fall through and handle /* Check \ for being a real metacharacter; if not, fall through and handle
it as a data character at the start of a string. Escape items are checked it as a data character at the start of a string. Escape items are checked
for validity in the pre-compiling pass. */ for validity in the pre-compiling pass. */
case '\\': case '\\':
tempptr = ptr; tempptr = ptr;
c = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCapturingBrackets, false); c = checkEscape(&ptr, patternEnd, errorcodeptr, cd.numCapturingBrackets, false);
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
are arranged to be the negation of the corresponding OP_values. For the are arranged to be the negation of the corresponding OP_values. For the
back references, the values are ESC_REF plus the reference number. Only back references, the values are ESC_REF plus the reference number. Only
back references and those types that consume a character may be repeated. back references and those types that consume a character may be repeated.
We can test for values between ESC_b and ESC_w for the latter; this may We can test for values between ESC_b and ESC_w for the latter; this may
have to change if any new ones are ever created. */ have to change if any new ones are ever created. */
if (c < 0) { if (c < 0) {
/* For metasequences that actually match a character, we disable the /* For metasequences that actually match a character, we disable the
setting of a first character if it hasn't already been set. */ setting of a first character if it hasn't already been set. */
if (firstbyte == REQ_UNSET && -c > ESC_b && -c <= ESC_w) if (firstbyte == REQ_UNSET && -c > ESC_b && -c <= ESC_w)
firstbyte = REQ_NONE; firstbyte = REQ_NONE;
/* Set values to reset to if this is followed by a zero repeat. */ /* Set values to reset to if this is followed by a zero repeat. */
zerofirstbyte = firstbyte; zerofirstbyte = firstbyte;
zeroreqbyte = reqbyte; zeroreqbyte = reqbyte;
/* Back references are handled specially */ /* Back references are handled specially */
if (-c >= ESC_REF) { if (-c >= ESC_REF) {
int number = -c - ESC_REF; int number = -c - ESC_REF;
previous = code; previous = code;
*code++ = OP_REF; *code++ = OP_REF;
put2ByteValueAndAdvance(code, number); put2ByteValueAndAdvance(code, number);
} }
/* For the rest, we can obtain the OP value by negating the escape /* For the rest, we can obtain the OP value by negating the escape
value */ value */
else { else {
previous = (-c > ESC_b && -c <= ESC_w) ? code : NULL; previous = (-c > ESC_b && -c <= ESC_w) ? code : NULL;
*code++ = -c; *code++ = -c;
} }
continue; continue;
} }
/* Fall through. */ /* Fall through. */
/* Handle a literal character. It is guaranteed not to be whitespace or # /* Handle a literal character. It is guaranteed not to be whitespace or #
when the extended flag is set. If we are in UTF-8 mode, it may be a when the extended flag is set. If we are in UTF-8 mode, it may be a
multi-byte literal character. */ multi-byte literal character. */
default: default:
NORMAL_CHAR: NORMAL_CHAR:
previous = code; previous = code;
if (c < 128) { if (c < 128) {
mclength = 1; mclength = 1;
mcbuffer[0] = c; mcbuffer[0] = c;
if ((options & IgnoreCaseOption) && (c | 0x20) >= 'a' && (c | 0x20) <= 'z') { if ((options & IgnoreCaseOption) && (c | 0x20) >= 'a' && (c | 0x20) <= 'z') {
*code++ = OP_ASCII_LETTER_IGNORING_CASE; *code++ = OP_ASCII_LETTER_IGNORING_CASE;
*code++ = c | 0x20; *code++ = c | 0x20;
...@@ -1633,24 +1627,24 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1633,24 +1627,24 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
} }
} else { } else {
mclength = encodeUTF8(c, mcbuffer); mclength = encodeUTF8(c, mcbuffer);
*code++ = (options & IgnoreCaseOption) ? OP_CHAR_IGNORING_CASE : OP_CHAR; *code++ = (options & IgnoreCaseOption) ? OP_CHAR_IGNORING_CASE : OP_CHAR;
for (c = 0; c < mclength; c++) for (c = 0; c < mclength; c++)
*code++ = mcbuffer[c]; *code++ = mcbuffer[c];
} }
/* Set the first and required bytes appropriately. If no previous first /* Set the first and required bytes appropriately. If no previous first
byte, set it from this character, but revert to none on a zero repeat. byte, set it from this character, but revert to none on a zero repeat.
Otherwise, leave the firstbyte value alone, and don't change it on a zero Otherwise, leave the firstbyte value alone, and don't change it on a zero
repeat. */ repeat. */
if (firstbyte == REQ_UNSET) { if (firstbyte == REQ_UNSET) {
zerofirstbyte = REQ_NONE; zerofirstbyte = REQ_NONE;
zeroreqbyte = reqbyte; zeroreqbyte = reqbyte;
/* If the character is more than one byte long, we can set firstbyte /* If the character is more than one byte long, we can set firstbyte
only if it is not to be matched caselessly. */ only if it is not to be matched caselessly. */
if (mclength == 1 || req_caseopt == 0) { if (mclength == 1 || req_caseopt == 0) {
firstbyte = mcbuffer[0] | req_caseopt; firstbyte = mcbuffer[0] | req_caseopt;
if (mclength != 1) if (mclength != 1)
...@@ -1659,25 +1653,25 @@ compileBranch(int options, int* brackets, unsigned char** codeptr, ...@@ -1659,25 +1653,25 @@ compileBranch(int options, int* brackets, unsigned char** codeptr,
else else
firstbyte = reqbyte = REQ_NONE; firstbyte = reqbyte = REQ_NONE;
} }
/* firstbyte was previously set; we can set reqbyte only the length is /* firstbyte was previously set; we can set reqbyte only the length is
1 or the matching is caseful. */ 1 or the matching is caseful. */
else { else {
zerofirstbyte = firstbyte; zerofirstbyte = firstbyte;
zeroreqbyte = reqbyte; zeroreqbyte = reqbyte;
if (mclength == 1 || req_caseopt == 0) if (mclength == 1 || req_caseopt == 0)
reqbyte = code[-1] | req_caseopt | cd.req_varyopt; reqbyte = code[-1] | req_caseopt | cd.req_varyopt;
} }
break; /* End of literal character handling */ break; /* End of literal character handling */
} }
} /* end of big loop */ } /* end of big loop */
/* Control never reaches here by falling through, only by a goto for all the /* Control never reaches here by falling through, only by a goto for all the
error states. Pass back the position in the pattern so that it can be displayed error states. Pass back the position in the pattern so that it can be displayed
to the user for diagnosing the error. */ to the user for diagnosing the error. */
FAILED: FAILED:
*ptrptr = ptr; *ptrptr = ptr;
return false; return false;
...@@ -1709,29 +1703,28 @@ Argument: ...@@ -1709,29 +1703,28 @@ Argument:
Returns: true on success Returns: true on success
*/ */
template <typename Char>
static bool static bool
compileBracket(int options, int* brackets, unsigned char** codeptr, compileBracket(int options, int* brackets, unsigned char** codeptr,
const Char** ptrptr, const Char* patternEnd, ErrorCode* errorcodeptr, int skipbytes, const UChar** ptrptr, const UChar* patternEnd, ErrorCode* errorcodeptr, int skipbytes,
int* firstbyteptr, int* reqbyteptr, CompileData& cd) int* firstbyteptr, int* reqbyteptr, CompileData& cd)
{ {
const Char* ptr = *ptrptr; const UChar* ptr = *ptrptr;
unsigned char* code = *codeptr; unsigned char* code = *codeptr;
unsigned char* last_branch = code; unsigned char* last_branch = code;
unsigned char* start_bracket = code; unsigned char* start_bracket = code;
int firstbyte = REQ_UNSET; int firstbyte = REQ_UNSET;
int reqbyte = REQ_UNSET; int reqbyte = REQ_UNSET;
/* Offset is set zero to mark that this bracket is still open */ /* Offset is set zero to mark that this bracket is still open */
putLinkValueAllowZero(code + 1, 0); putLinkValueAllowZero(code + 1, 0);
code += 1 + LINK_SIZE + skipbytes; code += 1 + LINK_SIZE + skipbytes;
/* Loop for each alternative branch */ /* Loop for each alternative branch */
while (true) { while (true) {
/* Now compile the branch */ /* Now compile the branch */
int branchfirstbyte; int branchfirstbyte;
int branchreqbyte; int branchreqbyte;
if (!compileBranch(options, brackets, &code, &ptr, patternEnd, errorcodeptr, if (!compileBranch(options, brackets, &code, &ptr, patternEnd, errorcodeptr,
...@@ -1739,45 +1732,45 @@ compileBracket(int options, int* brackets, unsigned char** codeptr, ...@@ -1739,45 +1732,45 @@ compileBracket(int options, int* brackets, unsigned char** codeptr,
*ptrptr = ptr; *ptrptr = ptr;
return false; return false;
} }
/* If this is the first branch, the firstbyte and reqbyte values for the /* If this is the first branch, the firstbyte and reqbyte values for the
branch become the values for the regex. */ branch become the values for the regex. */
if (*last_branch != OP_ALT) { if (*last_branch != OP_ALT) {
firstbyte = branchfirstbyte; firstbyte = branchfirstbyte;
reqbyte = branchreqbyte; reqbyte = branchreqbyte;
} }
/* If this is not the first branch, the first char and reqbyte have to /* If this is not the first branch, the first char and reqbyte have to
match the values from all the previous branches, except that if the previous match the values from all the previous branches, except that if the previous
value for reqbyte didn't have REQ_VARY set, it can still match, and we set value for reqbyte didn't have REQ_VARY set, it can still match, and we set
REQ_VARY for the regex. */ REQ_VARY for the regex. */
else { else {
/* If we previously had a firstbyte, but it doesn't match the new branch, /* If we previously had a firstbyte, but it doesn't match the new branch,
we have to abandon the firstbyte for the regex, but if there was previously we have to abandon the firstbyte for the regex, but if there was previously
no reqbyte, it takes on the value of the old firstbyte. */ no reqbyte, it takes on the value of the old firstbyte. */
if (firstbyte >= 0 && firstbyte != branchfirstbyte) { if (firstbyte >= 0 && firstbyte != branchfirstbyte) {
if (reqbyte < 0) if (reqbyte < 0)
reqbyte = firstbyte; reqbyte = firstbyte;
firstbyte = REQ_NONE; firstbyte = REQ_NONE;
} }
/* If we (now or from before) have no firstbyte, a firstbyte from the /* If we (now or from before) have no firstbyte, a firstbyte from the
branch becomes a reqbyte if there isn't a branch reqbyte. */ branch becomes a reqbyte if there isn't a branch reqbyte. */
if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0) if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
branchreqbyte = branchfirstbyte; branchreqbyte = branchfirstbyte;
/* Now ensure that the reqbytes match */ /* Now ensure that the reqbytes match */
if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY)) if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
reqbyte = REQ_NONE; reqbyte = REQ_NONE;
else else
reqbyte |= branchreqbyte; /* To "or" REQ_VARY */ reqbyte |= branchreqbyte; /* To "or" REQ_VARY */
} }
/* Reached end of expression, either ')' or end of pattern. Go back through /* Reached end of expression, either ')' or end of pattern. Go back through
the alternative branches and reverse the chain of offsets, with the field in the alternative branches and reverse the chain of offsets, with the field in
the BRA item now becoming an offset to the first alternative. If there are the BRA item now becoming an offset to the first alternative. If there are
...@@ -1786,7 +1779,7 @@ compileBracket(int options, int* brackets, unsigned char** codeptr, ...@@ -1786,7 +1779,7 @@ compileBracket(int options, int* brackets, unsigned char** codeptr,
the ims options were changed inside the group, compile a resetting op-code the ims options were changed inside the group, compile a resetting op-code
following, except at the very end of the pattern. Return leaving the pointer following, except at the very end of the pattern. Return leaving the pointer
at the terminating char. */ at the terminating char. */
if (ptr >= patternEnd || *ptr != '|') { if (ptr >= patternEnd || *ptr != '|') {
int length = code - last_branch; int length = code - last_branch;
do { do {
...@@ -1795,27 +1788,27 @@ compileBracket(int options, int* brackets, unsigned char** codeptr, ...@@ -1795,27 +1788,27 @@ compileBracket(int options, int* brackets, unsigned char** codeptr,
length = prev_length; length = prev_length;
last_branch -= length; last_branch -= length;
} while (length > 0); } while (length > 0);
/* Fill in the ket */ /* Fill in the ket */
*code = OP_KET; *code = OP_KET;
putLinkValue(code + 1, code - start_bracket); putLinkValue(code + 1, code - start_bracket);
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
/* Set values to pass back */ /* Set values to pass back */
*codeptr = code; *codeptr = code;
*ptrptr = ptr; *ptrptr = ptr;
*firstbyteptr = firstbyte; *firstbyteptr = firstbyte;
*reqbyteptr = reqbyte; *reqbyteptr = reqbyte;
return true; return true;
} }
/* Another branch follows; insert an "or" node. Its length field points back /* Another branch follows; insert an "or" node. Its length field points back
to the previous branch while the bracket remains open. At the end the chain to the previous branch while the bracket remains open. At the end the chain
is reversed. It's done like this so that the start of the bracket has a is reversed. It's done like this so that the start of the bracket has a
zero offset until it is closed, making it possible to detect recursion. */ zero offset until it is closed, making it possible to detect recursion. */
*code = OP_ALT; *code = OP_ALT;
putLinkValue(code + 1, code - last_branch); putLinkValue(code + 1, code - last_branch);
last_branch = code; last_branch = code;
...@@ -1851,7 +1844,7 @@ static bool branchIsAnchored(const unsigned char* code) ...@@ -1851,7 +1844,7 @@ static bool branchIsAnchored(const unsigned char* code)
if (op >= OP_BRA || op == OP_ASSERT) if (op >= OP_BRA || op == OP_ASSERT)
return bracketIsAnchored(scode); return bracketIsAnchored(scode);
/* Check for explicit anchoring */ /* Check for explicit anchoring */
return op == OP_CIRC; return op == OP_CIRC;
} }
...@@ -1891,7 +1884,7 @@ static bool branchNeedsLineStart(const unsigned char* code, unsigned captureMap, ...@@ -1891,7 +1884,7 @@ static bool branchNeedsLineStart(const unsigned char* code, unsigned captureMap,
{ {
const unsigned char* scode = firstSignificantOpcode(code); const unsigned char* scode = firstSignificantOpcode(code);
int op = *scode; int op = *scode;
/* Capturing brackets */ /* Capturing brackets */
if (op > OP_BRA) { if (op > OP_BRA) {
int captureNum = op - OP_BRA; int captureNum = op - OP_BRA;
...@@ -1900,14 +1893,14 @@ static bool branchNeedsLineStart(const unsigned char* code, unsigned captureMap, ...@@ -1900,14 +1893,14 @@ static bool branchNeedsLineStart(const unsigned char* code, unsigned captureMap,
int bracketMask = (captureNum < 32) ? (1 << captureNum) : 1; int bracketMask = (captureNum < 32) ? (1 << captureNum) : 1;
return bracketNeedsLineStart(scode, captureMap | bracketMask, backrefMap); return bracketNeedsLineStart(scode, captureMap | bracketMask, backrefMap);
} }
/* Other brackets */ /* Other brackets */
if (op == OP_BRA || op == OP_ASSERT) if (op == OP_BRA || op == OP_ASSERT)
return bracketNeedsLineStart(scode, captureMap, backrefMap); return bracketNeedsLineStart(scode, captureMap, backrefMap);
/* .* means "start at start or after \n" if it isn't in brackets that /* .* means "start at start or after \n" if it isn't in brackets that
may be referenced. */ may be referenced. */
if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
return scode[1] == OP_NOT_NEWLINE && !(captureMap & backrefMap); return scode[1] == OP_NOT_NEWLINE && !(captureMap & backrefMap);
...@@ -1949,14 +1942,14 @@ static int branchFindFirstAssertedCharacter(const unsigned char* code, bool inas ...@@ -1949,14 +1942,14 @@ static int branchFindFirstAssertedCharacter(const unsigned char* code, bool inas
{ {
const unsigned char* scode = firstSignificantOpcodeSkippingAssertions(code); const unsigned char* scode = firstSignificantOpcodeSkippingAssertions(code);
int op = *scode; int op = *scode;
if (op >= OP_BRA) if (op >= OP_BRA)
op = OP_BRA; op = OP_BRA;
switch (op) { switch (op) {
default: default:
return -1; return -1;
case OP_BRA: case OP_BRA:
case OP_ASSERT: case OP_ASSERT:
return bracketFindFirstAssertedCharacter(scode, op == OP_ASSERT); return bracketFindFirstAssertedCharacter(scode, op == OP_ASSERT);
...@@ -2002,8 +1995,7 @@ static inline int multiplyWithOverflowCheck(int a, int b) ...@@ -2002,8 +1995,7 @@ static inline int multiplyWithOverflowCheck(int a, int b)
return a * b; return a * b;
} }
template <typename Char> static int calculateCompiledPatternLength(const UChar* pattern, int patternLength, JSRegExpIgnoreCaseOption ignoreCase,
static int calculateCompiledPatternLength(const Char* pattern, int patternLength, JSRegExpIgnoreCaseOption ignoreCase,
CompileData& cd, ErrorCode& errorcode) CompileData& cd, ErrorCode& errorcode)
{ {
/* Make a pass over the pattern to compute the /* Make a pass over the pattern to compute the
...@@ -2022,10 +2014,10 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2022,10 +2014,10 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
int brastack[BRASTACK_SIZE]; int brastack[BRASTACK_SIZE];
unsigned char bralenstack[BRASTACK_SIZE]; unsigned char bralenstack[BRASTACK_SIZE];
int bracount = 0; int bracount = 0;
const Char* ptr = (const Char*)(pattern - 1); const UChar* ptr = (const UChar*)(pattern - 1);
const Char* patternEnd = (const Char*)(pattern + patternLength); const UChar* patternEnd = (const UChar*)(pattern + patternLength);
while (++ptr < patternEnd) { while (++ptr < patternEnd) {
int minRepeats = 0, maxRepeats = 0; int minRepeats = 0, maxRepeats = 0;
int c = *ptr; int c = *ptr;
...@@ -2038,12 +2030,12 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2038,12 +2030,12 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, false); c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, false);
if (errorcode != 0) if (errorcode != 0)
return -1; return -1;
lastitemlength = 1; /* Default length of last item for repeats */ lastitemlength = 1; /* Default length of last item for repeats */
if (c >= 0) { /* Data character */ if (c >= 0) { /* Data character */
length += 2; /* For a one-byte character */ length += 2; /* For a one-byte character */
if (c > 127) { if (c > 127) {
int i; int i;
for (i = 0; i < kjs_pcre_utf8_table1_size; i++) for (i = 0; i < kjs_pcre_utf8_table1_size; i++)
...@@ -2051,18 +2043,18 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2051,18 +2043,18 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
length += i; length += i;
lastitemlength += i; lastitemlength += i;
} }
continue; continue;
} }
/* Other escapes need one byte */ /* Other escapes need one byte */
length++; length++;
/* A back reference needs an additional 2 bytes, plus either one or 5 /* A back reference needs an additional 2 bytes, plus either one or 5
bytes for a repeat. We also need to keep the value of the highest bytes for a repeat. We also need to keep the value of the highest
back reference. */ back reference. */
if (c <= -ESC_REF) { if (c <= -ESC_REF) {
int refnum = -c - ESC_REF; int refnum = -c - ESC_REF;
cd.backrefMap |= (refnum < 32) ? (1 << refnum) : 1; cd.backrefMap |= (refnum < 32) ? (1 << refnum) : 1;
...@@ -2083,20 +2075,20 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2083,20 +2075,20 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
} }
} }
continue; continue;
case '^': /* Single-byte metacharacters */ case '^': /* Single-byte metacharacters */
case '.': case '.':
case '$': case '$':
length++; length++;
lastitemlength = 1; lastitemlength = 1;
continue; continue;
case '*': /* These repeats won't be after brackets; */ case '*': /* These repeats won't be after brackets; */
case '+': /* those are handled separately */ case '+': /* those are handled separately */
case '?': case '?':
length++; length++;
goto POSSESSIVE; goto POSSESSIVE;
/* This covers the cases of braced repeats after a single char, metachar, /* This covers the cases of braced repeats after a single char, metachar,
class, or back reference. */ class, or back reference. */
...@@ -2106,15 +2098,15 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2106,15 +2098,15 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
ptr = readRepeatCounts(ptr + 1, &minRepeats, &maxRepeats, &errorcode); ptr = readRepeatCounts(ptr + 1, &minRepeats, &maxRepeats, &errorcode);
if (errorcode != 0) if (errorcode != 0)
return -1; return -1;
/* These special cases just insert one extra opcode */ /* These special cases just insert one extra opcode */
if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) || if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) ||
(minRepeats == 1 && maxRepeats == -1)) (minRepeats == 1 && maxRepeats == -1))
length++; length++;
/* These cases might insert additional copies of a preceding character. */ /* These cases might insert additional copies of a preceding character. */
else { else {
if (minRepeats != 1) { if (minRepeats != 1) {
length -= lastitemlength; /* Uncount the original char or metachar */ length -= lastitemlength; /* Uncount the original char or metachar */
...@@ -2123,7 +2115,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2123,7 +2115,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
} }
length += lastitemlength + ((maxRepeats > 0) ? 3 : 1); length += lastitemlength + ((maxRepeats > 0) ? 3 : 1);
} }
if (safelyCheckNextChar(ptr, patternEnd, '?')) if (safelyCheckNextChar(ptr, patternEnd, '?'))
ptr++; /* Needs no extra length */ ptr++; /* Needs no extra length */
...@@ -2133,18 +2125,18 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2133,18 +2125,18 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
length += 2 + 2 * LINK_SIZE; /* Allow for atomic brackets */ length += 2 + 2 * LINK_SIZE; /* Allow for atomic brackets */
} }
continue; continue;
/* An alternation contains an offset to the next branch or ket. If any ims /* An alternation contains an offset to the next branch or ket. If any ims
options changed in the previous branch(es), and/or if we are in a options changed in the previous branch(es), and/or if we are in a
lookbehind assertion, extra space will be needed at the start of the lookbehind assertion, extra space will be needed at the start of the
branch. This is handled by branch_extra. */ branch. This is handled by branch_extra. */
case '|': case '|':
if (brastackptr == 0) if (brastackptr == 0)
cd.needOuterBracket = true; cd.needOuterBracket = true;
length += 1 + LINK_SIZE + branch_extra; length += 1 + LINK_SIZE + branch_extra;
continue; continue;
/* A character class uses 33 characters provided that all the character /* A character class uses 33 characters provided that all the character
values are less than 256. Otherwise, it uses a bit map for low valued values are less than 256. Otherwise, it uses a bit map for low valued
characters, and individual items for others. Don't worry about character characters, and individual items for others. Don't worry about character
...@@ -2152,7 +2144,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2152,7 +2144,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
compile. A character class that contains only one single-byte character compile. A character class that contains only one single-byte character
uses 2 or 3 bytes, depending on whether it is negated or not. Notice this uses 2 or 3 bytes, depending on whether it is negated or not. Notice this
where we can. (In UTF-8 mode we can do this only for chars < 128.) */ where we can. (In UTF-8 mode we can do this only for chars < 128.) */
case '[': { case '[': {
int class_optcount; int class_optcount;
if (*(++ptr) == '^') { if (*(++ptr) == '^') {
...@@ -2161,46 +2153,46 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2161,46 +2153,46 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
} }
else else
class_optcount = 0; class_optcount = 0;
bool class_utf8 = false; bool class_utf8 = false;
for (; ptr < patternEnd && *ptr != ']'; ++ptr) { for (; ptr < patternEnd && *ptr != ']'; ++ptr) {
/* Check for escapes */ /* Check for escapes */
if (*ptr == '\\') { if (*ptr == '\\') {
c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, true); c = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, true);
if (errorcode != 0) if (errorcode != 0)
return -1; return -1;
/* Handle escapes that turn into characters */ /* Handle escapes that turn into characters */
if (c >= 0) if (c >= 0)
goto NON_SPECIAL_CHARACTER; goto NON_SPECIAL_CHARACTER;
/* Escapes that are meta-things. The normal ones just affect the /* Escapes that are meta-things. The normal ones just affect the
bit map, but Unicode properties require an XCLASS extended item. */ bit map, but Unicode properties require an XCLASS extended item. */
else else
class_optcount = 10; /* \d, \s etc; make sure > 1 */ class_optcount = 10; /* \d, \s etc; make sure > 1 */
} }
/* Anything else increments the possible optimization count. We have to /* Anything else increments the possible optimization count. We have to
detect ranges here so that we can compute the number of extra ranges for detect ranges here so that we can compute the number of extra ranges for
caseless wide characters when UCP support is available. If there are wide caseless wide characters when UCP support is available. If there are wide
characters, we are going to have to use an XCLASS, even for single characters, we are going to have to use an XCLASS, even for single
characters. */ characters. */
else { else {
c = *ptr; c = *ptr;
/* Come here from handling \ above when it escapes to a char value */ /* Come here from handling \ above when it escapes to a char value */
NON_SPECIAL_CHARACTER: NON_SPECIAL_CHARACTER:
class_optcount++; class_optcount++;
int d = -1; int d = -1;
if (safelyCheckNextChar(ptr, patternEnd, '-')) { if (safelyCheckNextChar(ptr, patternEnd, '-')) {
Char const *hyptr = ptr++; UChar const *hyptr = ptr++;
if (safelyCheckNextChar(ptr, patternEnd, '\\')) { if (safelyCheckNextChar(ptr, patternEnd, '\\')) {
ptr++; ptr++;
d = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, true); d = checkEscape(&ptr, patternEnd, &errorcode, cd.numCapturingBrackets, true);
...@@ -2212,17 +2204,17 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2212,17 +2204,17 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
if (d < 0) if (d < 0)
ptr = hyptr; /* go back to hyphen as data */ ptr = hyptr; /* go back to hyphen as data */
} }
/* If d >= 0 we have a range. In UTF-8 mode, if the end is > 255, or > /* If d >= 0 we have a range. In UTF-8 mode, if the end is > 255, or >
127 for caseless matching, we will need to use an XCLASS. */ 127 for caseless matching, we will need to use an XCLASS. */
if (d >= 0) { if (d >= 0) {
class_optcount = 10; /* Ensure > 1 */ class_optcount = 10; /* Ensure > 1 */
if (d < c) { if (d < c) {
errorcode = ERR8; errorcode = ERR8;
return -1; return -1;
} }
if ((d > 255 || (ignoreCase && d > 127))) { if ((d > 255 || (ignoreCase && d > 127))) {
unsigned char buffer[6]; unsigned char buffer[6];
if (!class_utf8) /* Allow for XCLASS overhead */ if (!class_utf8) /* Allow for XCLASS overhead */
...@@ -2230,13 +2222,13 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2230,13 +2222,13 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
class_utf8 = true; class_utf8 = true;
length += LINK_SIZE + 2; length += LINK_SIZE + 2;
} }
/* If we have UCP support, find out how many extra ranges are /* If we have UCP support, find out how many extra ranges are
needed to map the other case of characters within this range. We needed to map the other case of characters within this range. We
have to mimic the range optimization here, because extending the have to mimic the range optimization here, because extending the
range upwards might push d over a boundary that makes it use range upwards might push d over a boundary that makes it use
another byte in the UTF-8 representation. */ another byte in the UTF-8 representation. */
if (ignoreCase) { if (ignoreCase) {
int occ, ocd; int occ, ocd;
int cc = c; int cc = c;
...@@ -2244,7 +2236,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2244,7 +2236,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
while (getOthercaseRange(&cc, origd, &occ, &ocd)) { while (getOthercaseRange(&cc, origd, &occ, &ocd)) {
if (occ >= c && ocd <= d) if (occ >= c && ocd <= d)
continue; /* Skip embedded */ continue; /* Skip embedded */
if (occ < c && ocd >= c - 1) /* Extend the basic range */ if (occ < c && ocd >= c - 1) /* Extend the basic range */
{ /* if there is overlap, */ { /* if there is overlap, */
c = occ; /* noting that if occ < c */ c = occ; /* noting that if occ < c */
...@@ -2255,26 +2247,26 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2255,26 +2247,26 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
d = ocd; d = ocd;
continue; continue;
} }
/* An extra item is needed */ /* An extra item is needed */
length += 1 + encodeUTF8(occ, buffer) + length += 1 + encodeUTF8(occ, buffer) +
((occ == ocd) ? 0 : encodeUTF8(ocd, buffer)); ((occ == ocd) ? 0 : encodeUTF8(ocd, buffer));
} }
} }
/* The length of the (possibly extended) range */ /* The length of the (possibly extended) range */
length += 1 + encodeUTF8(c, buffer) + encodeUTF8(d, buffer); length += 1 + encodeUTF8(c, buffer) + encodeUTF8(d, buffer);
} }
} }
/* We have a single character. There is nothing to be done unless we /* We have a single character. There is nothing to be done unless we
are in UTF-8 mode. If the char is > 255, or 127 when caseless, we must are in UTF-8 mode. If the char is > 255, or 127 when caseless, we must
allow for an XCL_SINGLE item, doubled for caselessness if there is UCP allow for an XCL_SINGLE item, doubled for caselessness if there is UCP
support. */ support. */
else { else {
if ((c > 255 || (ignoreCase && c > 127))) { if ((c > 255 || (ignoreCase && c > 127))) {
unsigned char buffer[6]; unsigned char buffer[6];
...@@ -2289,12 +2281,12 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2289,12 +2281,12 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
} }
} }
} }
if (ptr >= patternEnd) { /* Missing terminating ']' */ if (ptr >= patternEnd) { /* Missing terminating ']' */
errorcode = ERR6; errorcode = ERR6;
return -1; return -1;
} }
/* We can optimize when there was only one optimizable character. /* We can optimize when there was only one optimizable character.
Note that this does not detect the case of a negated single character. Note that this does not detect the case of a negated single character.
In that case we do an incorrect length computation, but it's not a serious In that case we do an incorrect length computation, but it's not a serious
...@@ -2306,10 +2298,10 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2306,10 +2298,10 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
/* Here, we handle repeats for the class opcodes. */ /* Here, we handle repeats for the class opcodes. */
{ {
length += 33; length += 33;
/* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier, /* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier,
we also need extra for wrapping the whole thing in a sub-pattern. */ we also need extra for wrapping the whole thing in a sub-pattern. */
if (safelyCheckNextChar(ptr, patternEnd, '{') && isCountedRepeat(ptr + 2, patternEnd)) { if (safelyCheckNextChar(ptr, patternEnd, '{') && isCountedRepeat(ptr + 2, patternEnd)) {
ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &errorcode); ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &errorcode);
if (errorcode != 0) if (errorcode != 0)
...@@ -2330,62 +2322,62 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2330,62 +2322,62 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
} }
/* Brackets may be genuine groups or special things */ /* Brackets may be genuine groups or special things */
case '(': { case '(': {
int branch_newextra = 0; int branch_newextra = 0;
int bracket_length = 1 + LINK_SIZE; int bracket_length = 1 + LINK_SIZE;
bool capturing = false; bool capturing = false;
/* Handle special forms of bracket, which all start (? */ /* Handle special forms of bracket, which all start (? */
if (safelyCheckNextChar(ptr, patternEnd, '?')) { if (safelyCheckNextChar(ptr, patternEnd, '?')) {
switch (c = (ptr + 2 < patternEnd ? ptr[2] : 0)) { switch (c = (ptr + 2 < patternEnd ? ptr[2] : 0)) {
/* Non-referencing groups and lookaheads just move the pointer on, and /* Non-referencing groups and lookaheads just move the pointer on, and
then behave like a non-special bracket, except that they don't increment then behave like a non-special bracket, except that they don't increment
the count of extracting brackets. Ditto for the "once only" bracket, the count of extracting brackets. Ditto for the "once only" bracket,
which is in Perl from version 5.005. */ which is in Perl from version 5.005. */
case ':': case ':':
case '=': case '=':
case '!': case '!':
ptr += 2; ptr += 2;
break; break;
/* Else loop checking valid options until ) is met. Anything else is an /* Else loop checking valid options until ) is met. Anything else is an
error. If we are without any brackets, i.e. at top level, the settings error. If we are without any brackets, i.e. at top level, the settings
act as if specified in the options, so massage the options immediately. act as if specified in the options, so massage the options immediately.
This is for backward compatibility with Perl 5.004. */ This is for backward compatibility with Perl 5.004. */
default: default:
errorcode = ERR12; errorcode = ERR12;
return -1; return -1;
} }
} else } else
capturing = 1; capturing = 1;
/* Capturing brackets must be counted so we can process escapes in a /* Capturing brackets must be counted so we can process escapes in a
Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to need Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to need
an additional 3 bytes of memory per capturing bracket. */ an additional 3 bytes of memory per capturing bracket. */
if (capturing) { if (capturing) {
bracount++; bracount++;
if (bracount > EXTRACT_BASIC_MAX) if (bracount > EXTRACT_BASIC_MAX)
bracket_length += 3; bracket_length += 3;
} }
/* Save length for computing whole length at end if there's a repeat that /* Save length for computing whole length at end if there's a repeat that
requires duplication of the group. Also save the current value of requires duplication of the group. Also save the current value of
branch_extra, and start the new group with the new value. If non-zero, this branch_extra, and start the new group with the new value. If non-zero, this
will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */
if (brastackptr >= sizeof(brastack)/sizeof(int)) { if (brastackptr >= sizeof(brastack)/sizeof(int)) {
errorcode = ERR17; errorcode = ERR17;
return -1; return -1;
} }
bralenstack[brastackptr] = branch_extra; bralenstack[brastackptr] = branch_extra;
branch_extra = branch_newextra; branch_extra = branch_newextra;
brastack[brastackptr++] = length; brastack[brastackptr++] = length;
length += bracket_length; length += bracket_length;
continue; continue;
...@@ -2406,10 +2398,10 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2406,10 +2398,10 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
} }
else else
duplength = 0; duplength = 0;
/* Leave ptr at the final char; for readRepeatCounts this happens /* Leave ptr at the final char; for readRepeatCounts this happens
automatically; for the others we need an increment. */ automatically; for the others we need an increment. */
if ((ptr + 1 < patternEnd) && (c = ptr[1]) == '{' && isCountedRepeat(ptr + 2, patternEnd)) { if ((ptr + 1 < patternEnd) && (c = ptr[1]) == '{' && isCountedRepeat(ptr + 2, patternEnd)) {
ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &errorcode); ptr = readRepeatCounts(ptr + 2, &minRepeats, &maxRepeats, &errorcode);
if (errorcode) if (errorcode)
...@@ -2430,12 +2422,12 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2430,12 +2422,12 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
minRepeats = 1; minRepeats = 1;
maxRepeats = 1; maxRepeats = 1;
} }
/* If the minimum is zero, we have to allow for an OP_BRAZERO before the /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
group, and if the maximum is greater than zero, we have to replicate group, and if the maximum is greater than zero, we have to replicate
maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
bracket set. */ bracket set. */
int repeatsLength; int repeatsLength;
if (minRepeats == 0) { if (minRepeats == 0) {
length++; length++;
...@@ -2452,13 +2444,13 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2452,13 +2444,13 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
} }
} }
} }
/* When the minimum is greater than zero, we have to replicate up to /* When the minimum is greater than zero, we have to replicate up to
minval-1 times, with no additions required in the copies. Then, if there minval-1 times, with no additions required in the copies. Then, if there
is a limited maximum we have to replicate up to maxval-1 times allowing is a limited maximum we have to replicate up to maxval-1 times allowing
for a BRAZERO item before each optional copy and nesting brackets for all for a BRAZERO item before each optional copy and nesting brackets for all
but one of the optional copies. */ but one of the optional copies. */
else { else {
repeatsLength = multiplyWithOverflowCheck(minRepeats - 1, duplength); repeatsLength = multiplyWithOverflowCheck(minRepeats - 1, duplength);
if (repeatsLength < 0) { if (repeatsLength < 0) {
...@@ -2479,9 +2471,9 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2479,9 +2471,9 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
return -1; return -1;
} }
} }
/* Allow space for once brackets for "possessive quantifier" */ /* Allow space for once brackets for "possessive quantifier" */
if (safelyCheckNextChar(ptr, patternEnd, '+')) { if (safelyCheckNextChar(ptr, patternEnd, '+')) {
ptr++; ptr++;
length += 2 + 2 * LINK_SIZE; length += 2 + 2 * LINK_SIZE;
...@@ -2492,7 +2484,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2492,7 +2484,7 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
/* Non-special character. It won't be space or # in extended mode, so it is /* Non-special character. It won't be space or # in extended mode, so it is
always a genuine character. If we are in a \Q...\E sequence, check for the always a genuine character. If we are in a \Q...\E sequence, check for the
end; if not, we have a literal. */ end; if not, we have a literal. */
default: default:
NORMAL_CHAR: NORMAL_CHAR:
length += 2; /* For a one-byte character */ length += 2; /* For a one-byte character */
...@@ -2506,11 +2498,11 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength ...@@ -2506,11 +2498,11 @@ static int calculateCompiledPatternLength(const Char* pattern, int patternLength
length += i; length += i;
lastitemlength += i; lastitemlength += i;
} }
continue; continue;
} }
} }
length += 2 + LINK_SIZE; /* For final KET and END */ length += 2 + LINK_SIZE; /* For final KET and END */
cd.numCapturingBrackets = bracount; cd.numCapturingBrackets = bracount;
...@@ -2545,8 +2537,7 @@ static inline JSRegExp* returnError(ErrorCode errorcode, const char** errorptr) ...@@ -2545,8 +2537,7 @@ static inline JSRegExp* returnError(ErrorCode errorcode, const char** errorptr)
return 0; return 0;
} }
template <typename Char> JSRegExp* jsRegExpCompile(const UChar* pattern, int patternLength,
JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength,
JSRegExpIgnoreCaseOption ignoreCase, JSRegExpMultilineOption multiline, JSRegExpIgnoreCaseOption ignoreCase, JSRegExpMultilineOption multiline,
unsigned* numSubpatterns, const char** errorptr, unsigned* numSubpatterns, const char** errorptr,
malloc_t* allocate_function, free_t* free_function) malloc_t* allocate_function, free_t* free_function)
...@@ -2556,9 +2547,9 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength, ...@@ -2556,9 +2547,9 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength,
if (!errorptr) if (!errorptr)
return 0; return 0;
*errorptr = NULL; *errorptr = NULL;
CompileData cd; CompileData cd;
ErrorCode errorcode = ERR0; ErrorCode errorcode = ERR0;
/* Call this once just to count the brackets. */ /* Call this once just to count the brackets. */
calculateCompiledPatternLength(pattern, patternLength, ignoreCase, cd, errorcode); calculateCompiledPatternLength(pattern, patternLength, ignoreCase, cd, errorcode);
...@@ -2566,29 +2557,29 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength, ...@@ -2566,29 +2557,29 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength,
int length = calculateCompiledPatternLength(pattern, patternLength, ignoreCase, cd, errorcode); int length = calculateCompiledPatternLength(pattern, patternLength, ignoreCase, cd, errorcode);
if (errorcode) if (errorcode)
return returnError(errorcode, errorptr); return returnError(errorcode, errorptr);
if (length > MAX_PATTERN_SIZE) if (length > MAX_PATTERN_SIZE)
return returnError(ERR16, errorptr); return returnError(ERR16, errorptr);
size_t size = length + sizeof(JSRegExp); size_t size = length + sizeof(JSRegExp);
JSRegExp* re = reinterpret_cast<JSRegExp*>((*allocate_function)(size)); JSRegExp* re = reinterpret_cast<JSRegExp*>((*allocate_function)(size));
if (!re) if (!re)
return returnError(ERR13, errorptr); return returnError(ERR13, errorptr);
re->options = (ignoreCase ? IgnoreCaseOption : 0) | (multiline ? MatchAcrossMultipleLinesOption : 0); re->options = (ignoreCase ? IgnoreCaseOption : 0) | (multiline ? MatchAcrossMultipleLinesOption : 0);
/* The starting points of the name/number translation table and of the code are /* The starting points of the name/number translation table and of the code are
passed around in the compile data block. */ passed around in the compile data block. */
const unsigned char* codeStart = (const unsigned char*)(re + 1); const unsigned char* codeStart = (const unsigned char*)(re + 1);
/* Set up a starting, non-extracting bracket, then compile the expression. On /* Set up a starting, non-extracting bracket, then compile the expression. On
error, errorcode will be set non-zero, so we don't need to look at the result error, errorcode will be set non-zero, so we don't need to look at the result
of the function here. */ of the function here. */
const Char* ptr = (const Char*)pattern; const UChar* ptr = (const UChar*)pattern;
const Char* patternEnd = pattern + patternLength; const UChar* patternEnd = pattern + patternLength;
unsigned char* code = (unsigned char*)codeStart; unsigned char* code = (unsigned char*)codeStart;
int firstbyte, reqbyte; int firstbyte, reqbyte;
int bracketCount = 0; int bracketCount = 0;
...@@ -2600,44 +2591,44 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength, ...@@ -2600,44 +2591,44 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength,
} }
re->top_bracket = bracketCount; re->top_bracket = bracketCount;
re->top_backref = cd.top_backref; re->top_backref = cd.top_backref;
/* If not reached end of pattern on success, there's an excess bracket. */ /* If not reached end of pattern on success, there's an excess bracket. */
if (errorcode == 0 && ptr < patternEnd) if (errorcode == 0 && ptr < patternEnd)
errorcode = ERR10; errorcode = ERR10;
/* Fill in the terminating state and check for disastrous overflow, but /* Fill in the terminating state and check for disastrous overflow, but
if debugging, leave the test till after things are printed out. */ if debugging, leave the test till after things are printed out. */
*code++ = OP_END; *code++ = OP_END;
ASSERT(code - codeStart <= length); ASSERT(code - codeStart <= length);
if (code - codeStart > length) if (code - codeStart > length)
errorcode = ERR7; errorcode = ERR7;
/* Give an error if there's back reference to a non-existent capturing /* Give an error if there's back reference to a non-existent capturing
subpattern. */ subpattern. */
if (re->top_backref > re->top_bracket) if (re->top_backref > re->top_bracket)
errorcode = ERR15; errorcode = ERR15;
/* Failed to compile, or error while post-processing */ /* Failed to compile, or error while post-processing */
if (errorcode != ERR0) { if (errorcode != ERR0) {
(*free_function)(reinterpret_cast<void*>(re)); (*free_function)(reinterpret_cast<void*>(re));
return returnError(errorcode, errorptr); return returnError(errorcode, errorptr);
} }
/* If the anchored option was not passed, set the flag if we can determine that /* If the anchored option was not passed, set the flag if we can determine that
the pattern is anchored by virtue of ^ characters or \A or anything else (such the pattern is anchored by virtue of ^ characters or \A or anything else (such
as starting with .* when DOTALL is set). as starting with .* when DOTALL is set).
Otherwise, if we know what the first character has to be, save it, because that Otherwise, if we know what the first character has to be, save it, because that
speeds up unanchored matches no end. If not, see if we can set the speeds up unanchored matches no end. If not, see if we can set the
UseMultiLineFirstByteOptimizationOption flag. This is helpful for multiline matches when all branches UseMultiLineFirstByteOptimizationOption flag. This is helpful for multiline matches when all branches
start with ^. and also when all branches start with .* for non-DOTALL matches. start with ^. and also when all branches start with .* for non-DOTALL matches.
*/ */
if (cd.needOuterBracket ? bracketIsAnchored(codeStart) : branchIsAnchored(codeStart)) if (cd.needOuterBracket ? bracketIsAnchored(codeStart) : branchIsAnchored(codeStart))
re->options |= IsAnchoredOption; re->options |= IsAnchoredOption;
else { else {
...@@ -2658,11 +2649,11 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength, ...@@ -2658,11 +2649,11 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength,
re->options |= UseMultiLineFirstByteOptimizationOption; re->options |= UseMultiLineFirstByteOptimizationOption;
} }
} }
/* For an anchored pattern, we use the "required byte" only if it follows a /* For an anchored pattern, we use the "required byte" only if it follows a
variable length item in the regex. Remove the caseless flag for non-caseable variable length item in the regex. Remove the caseless flag for non-caseable
bytes. */ bytes. */
if (reqbyte >= 0 && (!(re->options & IsAnchoredOption) || (reqbyte & REQ_VARY))) { if (reqbyte >= 0 && (!(re->options & IsAnchoredOption) || (reqbyte & REQ_VARY))) {
int ch = reqbyte & 255; int ch = reqbyte & 255;
if (ch < 127) { if (ch < 127) {
...@@ -2670,32 +2661,12 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength, ...@@ -2670,32 +2661,12 @@ JSRegExp* jsRegExpCompile(const Char* pattern, int patternLength,
re->options |= UseRequiredByteOptimizationOption; re->options |= UseRequiredByteOptimizationOption;
} }
} }
if (numSubpatterns) if (numSubpatterns)
*numSubpatterns = re->top_bracket; *numSubpatterns = re->top_bracket;
return re; return re;
} }
template
JSRegExp* jsRegExpCompile<unsigned short>(const unsigned short* pattern,
int patternLength,
JSRegExpIgnoreCaseOption ignoreCase,
JSRegExpMultilineOption multiline,
unsigned* numSubpatterns,
const char** errorptr,
malloc_t* allocate_function,
free_t* free_function);
template
JSRegExp* jsRegExpCompile<char>(const char* pattern,
int patternLength,
JSRegExpIgnoreCaseOption ignoreCase,
JSRegExpMultilineOption multiline,
unsigned* numSubpatterns,
const char** errorptr,
malloc_t* allocate_function,
free_t* free_function);
void jsRegExpFree(JSRegExp* re, free_t* free_function) void jsRegExpFree(JSRegExp* re, free_t* free_function)
{ {
(*free_function)(reinterpret_cast<void*>(re)); (*free_function)(reinterpret_cast<void*>(re));
......
...@@ -69,38 +69,36 @@ typedef void* ReturnLocation; ...@@ -69,38 +69,36 @@ typedef void* ReturnLocation;
/* Structure for building a chain of data for holding the values of /* Structure for building a chain of data for holding the values of
the subject pointer at the start of each bracket, used to detect when the subject pointer at the start of each bracket, used to detect when
an empty string has been matched by a bracket to break infinite loops. */ an empty string has been matched by a bracket to break infinite loops. */
template <typename Char>
struct BracketChainNode { struct BracketChainNode {
BracketChainNode<Char>* previousBracket; BracketChainNode* previousBracket;
const Char* bracketStart; const UChar* bracketStart;
}; };
template <typename Char>
struct MatchFrame { struct MatchFrame {
ReturnLocation returnLocation; ReturnLocation returnLocation;
struct MatchFrame<Char>* previousFrame; struct MatchFrame* previousFrame;
/* Function arguments that may change */ /* Function arguments that may change */
struct { struct {
const Char* subjectPtr; const UChar* subjectPtr;
const unsigned char* instructionPtr; const unsigned char* instructionPtr;
int offsetTop; int offsetTop;
BracketChainNode<Char>* bracketChain; BracketChainNode* bracketChain;
} args; } args;
/* PCRE uses "fake" recursion built off of gotos, thus /* PCRE uses "fake" recursion built off of gotos, thus
stack-based local variables are not safe to use. Instead we have to stack-based local variables are not safe to use. Instead we have to
store local variables on the current MatchFrame. */ store local variables on the current MatchFrame. */
struct { struct {
const unsigned char* data; const unsigned char* data;
const unsigned char* startOfRepeatingBracket; const unsigned char* startOfRepeatingBracket;
const Char* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
const unsigned char* instructionPtrAtStartOfOnce; const unsigned char* instructionPtrAtStartOfOnce;
int repeatOthercase; int repeatOthercase;
int ctype; int ctype;
int fc; int fc;
int fi; int fi;
...@@ -111,23 +109,22 @@ struct MatchFrame { ...@@ -111,23 +109,22 @@ struct MatchFrame {
int saveOffset1; int saveOffset1;
int saveOffset2; int saveOffset2;
int saveOffset3; int saveOffset3;
BracketChainNode<Char> bracketChainNode; BracketChainNode bracketChainNode;
} locals; } locals;
}; };
/* Structure for passing "static" information around between the functions /* Structure for passing "static" information around between the functions
doing traditional NFA matching, so that they are thread-safe. */ doing traditional NFA matching, so that they are thread-safe. */
template <typename Char>
struct MatchData { struct MatchData {
int* offsetVector; /* Offset vector */ int* offsetVector; /* Offset vector */
int offsetEnd; /* One past the end */ int offsetEnd; /* One past the end */
int offsetMax; /* The maximum usable for return data */ int offsetMax; /* The maximum usable for return data */
bool offsetOverflow; /* Set if too many extractions */ bool offsetOverflow; /* Set if too many extractions */
const Char* startSubject; /* Start of the subject string */ const UChar* startSubject; /* Start of the subject string */
const Char* endSubject; /* End of the subject string */ const UChar* endSubject; /* End of the subject string */
const Char* endMatchPtr; /* Subject position at end match */ const UChar* endMatchPtr; /* Subject position at end match */
int endOffsetTop; /* Highwater mark at end of match */ int endOffsetTop; /* Highwater mark at end of match */
bool multiline; bool multiline;
bool ignoreCase; bool ignoreCase;
...@@ -158,8 +155,7 @@ Arguments: ...@@ -158,8 +155,7 @@ Arguments:
md pointer to matching data block, if isSubject is true md pointer to matching data block, if isSubject is true
*/ */
template <typename Char> static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
static void pchars(const Char* p, int length, bool isSubject, const MatchData& md)
{ {
if (isSubject && length > md.endSubject - p) if (isSubject && length > md.endSubject - p)
length = md.endSubject - p; length = md.endSubject - p;
...@@ -191,11 +187,10 @@ Arguments: ...@@ -191,11 +187,10 @@ Arguments:
Returns: true if matched Returns: true if matched
*/ */
template <typename Char> static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
static bool matchRef(int offset, const Char* subjectPtr, int length, const MatchData<Char>& md)
{ {
const Char* p = md.startSubject + md.offsetVector[offset]; const UChar* p = md.startSubject + md.offsetVector[offset];
#ifdef DEBUG #ifdef DEBUG
if (subjectPtr >= md.endSubject) if (subjectPtr >= md.endSubject)
printf("matching subject <null>"); printf("matching subject <null>");
...@@ -207,19 +202,19 @@ static bool matchRef(int offset, const Char* subjectPtr, int length, const Match ...@@ -207,19 +202,19 @@ static bool matchRef(int offset, const Char* subjectPtr, int length, const Match
pchars(p, length, false, md); pchars(p, length, false, md);
printf("\n"); printf("\n");
#endif #endif
/* Always fail if not enough characters left */ /* Always fail if not enough characters left */
if (length > md.endSubject - subjectPtr) if (length > md.endSubject - subjectPtr)
return false; return false;
/* Separate the caselesss case for speed */ /* Separate the caselesss case for speed */
if (md.ignoreCase) { if (md.ignoreCase) {
while (length-- > 0) { while (length-- > 0) {
Char c = *p++; UChar c = *p++;
int othercase = kjs_pcre_ucp_othercase(c); int othercase = kjs_pcre_ucp_othercase(c);
Char d = *subjectPtr++; UChar d = *subjectPtr++;
if (c != d && othercase != d) if (c != d && othercase != d)
return false; return false;
} }
...@@ -229,7 +224,7 @@ static bool matchRef(int offset, const Char* subjectPtr, int length, const Match ...@@ -229,7 +224,7 @@ static bool matchRef(int offset, const Char* subjectPtr, int length, const Match
if (*p++ != *subjectPtr++) if (*p++ != *subjectPtr++)
return false; return false;
} }
return true; return true;
} }
...@@ -301,7 +296,6 @@ Returns: 1 if matched ) these values are >= 0 ...@@ -301,7 +296,6 @@ Returns: 1 if matched ) these values are >= 0
static const unsigned FRAMES_ON_STACK = 16; static const unsigned FRAMES_ON_STACK = 16;
template <typename Char>
struct MatchStack { struct MatchStack {
MatchStack() MatchStack()
: framesEnd(frames + FRAMES_ON_STACK) : framesEnd(frames + FRAMES_ON_STACK)
...@@ -310,27 +304,27 @@ struct MatchStack { ...@@ -310,27 +304,27 @@ struct MatchStack {
{ {
ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK); ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
} }
MatchFrame<Char> frames[FRAMES_ON_STACK]; MatchFrame frames[FRAMES_ON_STACK];
MatchFrame<Char>* framesEnd; MatchFrame* framesEnd;
MatchFrame<Char>* currentFrame; MatchFrame* currentFrame;
unsigned size; unsigned size;
inline bool canUseStackBufferForNextFrame() inline bool canUseStackBufferForNextFrame()
{ {
return size < FRAMES_ON_STACK; return size < FRAMES_ON_STACK;
} }
inline MatchFrame<Char>* allocateNextFrame() inline MatchFrame* allocateNextFrame()
{ {
if (canUseStackBufferForNextFrame()) if (canUseStackBufferForNextFrame())
return currentFrame + 1; return currentFrame + 1;
return new MatchFrame<Char>; return new MatchFrame;
} }
inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode<Char>* bracketChain, ReturnLocation returnLocation) inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation)
{ {
MatchFrame<Char>* newframe = allocateNextFrame(); MatchFrame* newframe = allocateNextFrame();
newframe->previousFrame = currentFrame; newframe->previousFrame = currentFrame;
newframe->args.subjectPtr = currentFrame->args.subjectPtr; newframe->args.subjectPtr = currentFrame->args.subjectPtr;
...@@ -342,10 +336,10 @@ struct MatchStack { ...@@ -342,10 +336,10 @@ struct MatchStack {
currentFrame = newframe; currentFrame = newframe;
} }
inline void popCurrentFrame() inline void popCurrentFrame()
{ {
MatchFrame<Char>* oldFrame = currentFrame; MatchFrame* oldFrame = currentFrame;
currentFrame = currentFrame->previousFrame; currentFrame = currentFrame->previousFrame;
if (size > FRAMES_ON_STACK) if (size > FRAMES_ON_STACK)
delete oldFrame; delete oldFrame;
...@@ -359,8 +353,7 @@ struct MatchStack { ...@@ -359,8 +353,7 @@ struct MatchStack {
} }
}; };
template <typename Char> static int matchError(int errorCode, MatchStack& stack)
static int matchError(int errorCode, MatchStack<Char>& stack)
{ {
stack.popAllFrames(); stack.popAllFrames();
return errorCode; return errorCode;
...@@ -384,14 +377,13 @@ static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* su ...@@ -384,14 +377,13 @@ static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* su
} }
} }
template <typename Char> static inline void startNewGroup(MatchFrame* currentFrame)
static inline void startNewGroup(MatchFrame<Char>* currentFrame)
{ {
/* At the start of a bracketed group, add the current subject pointer to the /* At the start of a bracketed group, add the current subject pointer to the
stack of such pointers, to be re-instated at the end of the group when we hit stack of such pointers, to be re-instated at the end of the group when we hit
the closing ket. When match() is called in other circumstances, we don't add to the closing ket. When match() is called in other circumstances, we don't add to
this stack. */ this stack. */
currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain; currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain;
currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr; currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr;
currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode; currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode;
...@@ -412,15 +404,14 @@ static inline void repeatInformationFromInstructionOffset(short instructionOffse ...@@ -412,15 +404,14 @@ static inline void repeatInformationFromInstructionOffset(short instructionOffse
maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset]; maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
} }
template <typename Char> static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
static int match(const Char* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData<Char>& md)
{ {
bool isMatch = false; bool isMatch = false;
int min; int min;
bool minimize = false; /* Initialization not really needed, but some compilers think so. */ bool minimize = false; /* Initialization not really needed, but some compilers think so. */
unsigned matchCount = 0; unsigned matchCount = 0;
MatchStack<Char> stack; MatchStack stack;
/* The opcode jump table. */ /* The opcode jump table. */
#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
...@@ -428,13 +419,13 @@ static int match(const Char* subjectPtr, const unsigned char* instructionPtr, in ...@@ -428,13 +419,13 @@ static int match(const Char* subjectPtr, const unsigned char* instructionPtr, in
static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) }; static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
#undef EMIT_JUMP_TABLE_ENTRY #undef EMIT_JUMP_TABLE_ENTRY
#endif #endif
/* One-time setup of the opcode jump table. */ /* One-time setup of the opcode jump table. */
#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
for (int i = 255; !opcodeJumpTable[i]; i--) for (int i = 255; !opcodeJumpTable[i]; i--)
opcodeJumpTable[i] = &&CAPTURING_BRACKET; opcodeJumpTable[i] = &&CAPTURING_BRACKET;
#endif #endif
#ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
// Shark shows this as a hot line // Shark shows this as a hot line
// Using a static const here makes this line disappear, but makes later access hotter (not sure why) // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
...@@ -447,20 +438,20 @@ static int match(const Char* subjectPtr, const unsigned char* instructionPtr, in ...@@ -447,20 +438,20 @@ static int match(const Char* subjectPtr, const unsigned char* instructionPtr, in
stack.currentFrame->args.offsetTop = offsetTop; stack.currentFrame->args.offsetTop = offsetTop;
stack.currentFrame->args.bracketChain = 0; stack.currentFrame->args.bracketChain = 0;
startNewGroup(stack.currentFrame); startNewGroup(stack.currentFrame);
/* This is where control jumps back to to effect "recursion" */ /* This is where control jumps back to to effect "recursion" */
RECURSE: RECURSE:
if (++matchCount > matchLimit) if (++matchCount > matchLimit)
return matchError(JSRegExpErrorHitLimit, stack); return matchError(JSRegExpErrorHitLimit, stack);
/* Now start processing the operations. */ /* Now start processing the operations. */
#ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
while (true) while (true)
#endif #endif
{ {
#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
#define BEGIN_OPCODE(opcode) LABEL_OP_##opcode #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
#define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr] #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
...@@ -468,7 +459,7 @@ RECURSE: ...@@ -468,7 +459,7 @@ RECURSE:
#define BEGIN_OPCODE(opcode) case OP_##opcode #define BEGIN_OPCODE(opcode) case OP_##opcode
#define NEXT_OPCODE continue #define NEXT_OPCODE continue
#endif #endif
#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
NEXT_OPCODE; NEXT_OPCODE;
#else #else
...@@ -476,7 +467,7 @@ RECURSE: ...@@ -476,7 +467,7 @@ RECURSE:
#endif #endif
{ {
/* Non-capturing bracket: optimized */ /* Non-capturing bracket: optimized */
BEGIN_OPCODE(BRA): BEGIN_OPCODE(BRA):
NON_CAPTURING_BRACKET: NON_CAPTURING_BRACKET:
DPRINTF(("start bracket 0\n")); DPRINTF(("start bracket 0\n"));
...@@ -488,27 +479,27 @@ RECURSE: ...@@ -488,27 +479,27 @@ RECURSE:
} while (*stack.currentFrame->args.instructionPtr == OP_ALT); } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
DPRINTF(("bracket 0 failed\n")); DPRINTF(("bracket 0 failed\n"));
RRETURN; RRETURN;
/* Skip over large extraction number data if encountered. */ /* Skip over large extraction number data if encountered. */
BEGIN_OPCODE(BRANUMBER): BEGIN_OPCODE(BRANUMBER):
stack.currentFrame->args.instructionPtr += 3; stack.currentFrame->args.instructionPtr += 3;
NEXT_OPCODE; NEXT_OPCODE;
/* End of the pattern. */ /* End of the pattern. */
BEGIN_OPCODE(END): BEGIN_OPCODE(END):
md.endMatchPtr = stack.currentFrame->args.subjectPtr; /* Record where we ended */ md.endMatchPtr = stack.currentFrame->args.subjectPtr; /* Record where we ended */
md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and how many extracts were taken */ md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and how many extracts were taken */
isMatch = true; isMatch = true;
RRETURN; RRETURN;
/* Assertion brackets. Check the alternative branches in turn - the /* Assertion brackets. Check the alternative branches in turn - the
matching won't pass the KET for an assertion. If any one branch matches, matching won't pass the KET for an assertion. If any one branch matches,
the assertion is true. Lookbehind assertions have an OP_REVERSE item at the the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
start of each branch to move the current point backwards, so the code at start of each branch to move the current point backwards, so the code at
this level is identical to the lookahead case. */ this level is identical to the lookahead case. */
BEGIN_OPCODE(ASSERT): BEGIN_OPCODE(ASSERT):
do { do {
RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL); RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
...@@ -518,17 +509,17 @@ RECURSE: ...@@ -518,17 +509,17 @@ RECURSE:
} while (*stack.currentFrame->args.instructionPtr == OP_ALT); } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
if (*stack.currentFrame->args.instructionPtr == OP_KET) if (*stack.currentFrame->args.instructionPtr == OP_KET)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
/* Continue from after the assertion, updating the offsets high water /* Continue from after the assertion, updating the offsets high water
mark, since extracts may have been taken during the assertion. */ mark, since extracts may have been taken during the assertion. */
advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
stack.currentFrame->args.offsetTop = md.endOffsetTop; stack.currentFrame->args.offsetTop = md.endOffsetTop;
NEXT_OPCODE; NEXT_OPCODE;
/* Negative assertion: all branches must fail to match */ /* Negative assertion: all branches must fail to match */
BEGIN_OPCODE(ASSERT_NOT): BEGIN_OPCODE(ASSERT_NOT):
do { do {
RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL); RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
...@@ -536,23 +527,23 @@ RECURSE: ...@@ -536,23 +527,23 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
} while (*stack.currentFrame->args.instructionPtr == OP_ALT); } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
NEXT_OPCODE; NEXT_OPCODE;
/* An alternation is the end of a branch; scan along to find the end of the /* An alternation is the end of a branch; scan along to find the end of the
bracketed group and go to there. */ bracketed group and go to there. */
BEGIN_OPCODE(ALT): BEGIN_OPCODE(ALT):
advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
NEXT_OPCODE; NEXT_OPCODE;
/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
that it may occur zero times. It may repeat infinitely, or not at all - that it may occur zero times. It may repeat infinitely, or not at all -
i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
repeat limits are compiled as a number of copies, with the optional ones repeat limits are compiled as a number of copies, with the optional ones
preceded by BRAZERO or BRAMINZERO. */ preceded by BRAZERO or BRAMINZERO. */
BEGIN_OPCODE(BRAZERO): { BEGIN_OPCODE(BRAZERO): {
stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1; stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
...@@ -562,7 +553,7 @@ RECURSE: ...@@ -562,7 +553,7 @@ RECURSE:
stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE; stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
NEXT_OPCODE; NEXT_OPCODE;
} }
BEGIN_OPCODE(BRAMINZERO): { BEGIN_OPCODE(BRAMINZERO): {
stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1; stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket); advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
...@@ -572,12 +563,12 @@ RECURSE: ...@@ -572,12 +563,12 @@ RECURSE:
stack.currentFrame->args.instructionPtr++; stack.currentFrame->args.instructionPtr++;
NEXT_OPCODE; NEXT_OPCODE;
} }
/* End of a group, repeated or non-repeating. If we are at the end of /* End of a group, repeated or non-repeating. If we are at the end of
an assertion "group", stop matching and return 1, but record the an assertion "group", stop matching and return 1, but record the
current high water mark for use by positive assertions. Do this also current high water mark for use by positive assertions. Do this also
for the "once" (not-backup up) groups. */ for the "once" (not-backup up) groups. */
BEGIN_OPCODE(KET): BEGIN_OPCODE(KET):
BEGIN_OPCODE(KETRMIN): BEGIN_OPCODE(KETRMIN):
BEGIN_OPCODE(KETRMAX): BEGIN_OPCODE(KETRMAX):
...@@ -593,30 +584,30 @@ RECURSE: ...@@ -593,30 +584,30 @@ RECURSE:
isMatch = true; isMatch = true;
RRETURN; RRETURN;
} }
/* In all other cases except a conditional group we have to check the /* In all other cases except a conditional group we have to check the
group number back at the start and if necessary complete handling an group number back at the start and if necessary complete handling an
extraction by setting the offsets and bumping the high water mark. */ extraction by setting the offsets and bumping the high water mark. */
stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA; stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
/* For extended extraction brackets (large number), we have to fish out /* For extended extraction brackets (large number), we have to fish out
the number from a dummy opcode at the start. */ the number from a dummy opcode at the start. */
if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX) if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE); stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1; stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
#ifdef DEBUG #ifdef DEBUG
printf("end bracket %d", stack.currentFrame->locals.number); printf("end bracket %d", stack.currentFrame->locals.number);
printf("\n"); printf("\n");
#endif #endif
/* Test for a numbered group. This includes groups called as a result /* Test for a numbered group. This includes groups called as a result
of recursion. Note that whole-pattern recursion is coded as a recurse of recursion. Note that whole-pattern recursion is coded as a recurse
into group 0, so it won't be picked up here. Instead, we catch it when into group 0, so it won't be picked up here. Instead, we catch it when
the OP_END is reached. */ the OP_END is reached. */
if (stack.currentFrame->locals.number > 0) { if (stack.currentFrame->locals.number > 0) {
if (stack.currentFrame->locals.offset >= md.offsetMax) if (stack.currentFrame->locals.offset >= md.offsetMax)
md.offsetOverflow = true; md.offsetOverflow = true;
...@@ -628,21 +619,21 @@ RECURSE: ...@@ -628,21 +619,21 @@ RECURSE:
stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2; stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
} }
} }
/* For a non-repeating ket, just continue at this level. This also /* For a non-repeating ket, just continue at this level. This also
happens for a repeating ket if no characters were matched in the group. happens for a repeating ket if no characters were matched in the group.
This is the forcible breaking of infinite loops as implemented in Perl This is the forcible breaking of infinite loops as implemented in Perl
5.005. If there is an options reset, it will get obeyed in the normal 5.005. If there is an options reset, it will get obeyed in the normal
course of events. */ course of events. */
if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) { if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
NEXT_OPCODE; NEXT_OPCODE;
} }
/* The repeating kets try the rest of the pattern or restart from the /* The repeating kets try the rest of the pattern or restart from the
preceding bracket, in the appropriate order. */ preceding bracket, in the appropriate order. */
if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) { if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
if (isMatch) if (isMatch)
...@@ -659,7 +650,7 @@ RECURSE: ...@@ -659,7 +650,7 @@ RECURSE:
RRETURN; RRETURN;
} }
RRETURN; RRETURN;
/* Start of subject. */ /* Start of subject. */
BEGIN_OPCODE(CIRC): BEGIN_OPCODE(CIRC):
...@@ -691,28 +682,28 @@ RECURSE: ...@@ -691,28 +682,28 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.instructionPtr++; stack.currentFrame->args.instructionPtr++;
NEXT_OPCODE; NEXT_OPCODE;
/* Word boundary assertions */ /* Word boundary assertions */
BEGIN_OPCODE(NOT_WORD_BOUNDARY): BEGIN_OPCODE(NOT_WORD_BOUNDARY):
BEGIN_OPCODE(WORD_BOUNDARY): { BEGIN_OPCODE(WORD_BOUNDARY): {
bool currentCharIsWordChar = false; bool currentCharIsWordChar = false;
bool previousCharIsWordChar = false; bool previousCharIsWordChar = false;
if (stack.currentFrame->args.subjectPtr > md.startSubject) if (stack.currentFrame->args.subjectPtr > md.startSubject)
previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]); previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
if (stack.currentFrame->args.subjectPtr < md.endSubject) if (stack.currentFrame->args.subjectPtr < md.endSubject)
currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr); currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
/* Now see if the situation is what we want */ /* Now see if the situation is what we want */
bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY); bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar) if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
NEXT_OPCODE; NEXT_OPCODE;
} }
/* Match a single character type; inline for speed */ /* Match a single character type; inline for speed */
BEGIN_OPCODE(NOT_NEWLINE): BEGIN_OPCODE(NOT_NEWLINE):
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -752,7 +743,7 @@ RECURSE: ...@@ -752,7 +743,7 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.instructionPtr++; stack.currentFrame->args.instructionPtr++;
NEXT_OPCODE; NEXT_OPCODE;
BEGIN_OPCODE(NOT_WORDCHAR): BEGIN_OPCODE(NOT_WORDCHAR):
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -760,7 +751,7 @@ RECURSE: ...@@ -760,7 +751,7 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.instructionPtr++; stack.currentFrame->args.instructionPtr++;
NEXT_OPCODE; NEXT_OPCODE;
BEGIN_OPCODE(WORDCHAR): BEGIN_OPCODE(WORDCHAR):
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -768,7 +759,7 @@ RECURSE: ...@@ -768,7 +759,7 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.instructionPtr++; stack.currentFrame->args.instructionPtr++;
NEXT_OPCODE; NEXT_OPCODE;
/* Match a back reference, possibly repeatedly. Look past the end of the /* Match a back reference, possibly repeatedly. Look past the end of the
item to see if there is repeat information following. The code is similar item to see if there is repeat information following. The code is similar
to that for character classes, but repeated for efficiency. Then obey to that for character classes, but repeated for efficiency. Then obey
...@@ -776,23 +767,23 @@ RECURSE: ...@@ -776,23 +767,23 @@ RECURSE:
However, if the referenced string is the empty string, always treat However, if the referenced string is the empty string, always treat
it as matched, any number of times (otherwise there could be infinite it as matched, any number of times (otherwise there could be infinite
loops). */ loops). */
BEGIN_OPCODE(REF): BEGIN_OPCODE(REF):
stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1; /* Doubled ref number */ stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1; /* Doubled ref number */
stack.currentFrame->args.instructionPtr += 3; /* Advance past item */ stack.currentFrame->args.instructionPtr += 3; /* Advance past item */
/* If the reference is unset, set the length to be longer than the amount /* If the reference is unset, set the length to be longer than the amount
of subject left; this ensures that every attempt at a match fails. We of subject left; this ensures that every attempt at a match fails. We
can't just fail here, because of the possibility of quantifiers with zero can't just fail here, because of the possibility of quantifiers with zero
minima. */ minima. */
if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0) if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
stack.currentFrame->locals.length = 0; stack.currentFrame->locals.length = 0;
else else
stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset]; stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
/* Set up for repetition, or handle the non-repeated case */ /* Set up for repetition, or handle the non-repeated case */
switch (*stack.currentFrame->args.instructionPtr) { switch (*stack.currentFrame->args.instructionPtr) {
case OP_CRSTAR: case OP_CRSTAR:
case OP_CRMINSTAR: case OP_CRMINSTAR:
...@@ -802,7 +793,7 @@ RECURSE: ...@@ -802,7 +793,7 @@ RECURSE:
case OP_CRMINQUERY: case OP_CRMINQUERY:
repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max); repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
break; break;
case OP_CRRANGE: case OP_CRRANGE:
case OP_CRMINRANGE: case OP_CRMINRANGE:
minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE); minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
...@@ -812,36 +803,36 @@ RECURSE: ...@@ -812,36 +803,36 @@ RECURSE:
stack.currentFrame->locals.max = INT_MAX; stack.currentFrame->locals.max = INT_MAX;
stack.currentFrame->args.instructionPtr += 5; stack.currentFrame->args.instructionPtr += 5;
break; break;
default: /* No repeat follows */ default: /* No repeat follows */
if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length; stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
NEXT_OPCODE; NEXT_OPCODE;
} }
/* If the length of the reference is zero, just continue with the /* If the length of the reference is zero, just continue with the
main loop. */ main loop. */
if (stack.currentFrame->locals.length == 0) if (stack.currentFrame->locals.length == 0)
NEXT_OPCODE; NEXT_OPCODE;
/* First, ensure the minimum number of matches are present. */ /* First, ensure the minimum number of matches are present. */
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length; stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
} }
/* If min = max, continue at the same level without recursion. /* If min = max, continue at the same level without recursion.
They are not both allowed to be zero. */ They are not both allowed to be zero. */
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
/* If minimizing, keep trying and advancing the pointer */ /* If minimizing, keep trying and advancing the pointer */
if (minimize) { if (minimize) {
for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
...@@ -853,9 +844,9 @@ RECURSE: ...@@ -853,9 +844,9 @@ RECURSE:
} }
/* Control never reaches here */ /* Control never reaches here */
} }
/* If maximizing, find the longest string and work backwards */ /* If maximizing, find the longest string and work backwards */
else { else {
stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
...@@ -872,23 +863,23 @@ RECURSE: ...@@ -872,23 +863,23 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
} }
/* Control never reaches here */ /* Control never reaches here */
/* Match a bit-mapped character class, possibly repeatedly. This op code is /* Match a bit-mapped character class, possibly repeatedly. This op code is
used when all the characters in the class have values in the range 0-255, used when all the characters in the class have values in the range 0-255,
and either the matching is caseful, or the characters are in the range and either the matching is caseful, or the characters are in the range
0-127 when UTF-8 processing is enabled. The only difference between 0-127 when UTF-8 processing is enabled. The only difference between
OP_CLASS and OP_NCLASS occurs when a data character outside the range is OP_CLASS and OP_NCLASS occurs when a data character outside the range is
encountered. encountered.
First, look past the end of the item to see if there is repeat information First, look past the end of the item to see if there is repeat information
following. Then obey similar code to character type repeats - written out following. Then obey similar code to character type repeats - written out
again for speed. */ again for speed. */
BEGIN_OPCODE(NCLASS): BEGIN_OPCODE(NCLASS):
BEGIN_OPCODE(CLASS): BEGIN_OPCODE(CLASS):
stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1; /* Save for matching */ stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1; /* Save for matching */
stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */ stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */
switch (*stack.currentFrame->args.instructionPtr) { switch (*stack.currentFrame->args.instructionPtr) {
case OP_CRSTAR: case OP_CRSTAR:
case OP_CRMINSTAR: case OP_CRMINSTAR:
...@@ -898,7 +889,7 @@ RECURSE: ...@@ -898,7 +889,7 @@ RECURSE:
case OP_CRMINQUERY: case OP_CRMINQUERY:
repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max); repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
break; break;
case OP_CRRANGE: case OP_CRRANGE:
case OP_CRMINRANGE: case OP_CRMINRANGE:
minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE); minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
...@@ -908,14 +899,14 @@ RECURSE: ...@@ -908,14 +899,14 @@ RECURSE:
stack.currentFrame->locals.max = INT_MAX; stack.currentFrame->locals.max = INT_MAX;
stack.currentFrame->args.instructionPtr += 5; stack.currentFrame->args.instructionPtr += 5;
break; break;
default: /* No repeat follows */ default: /* No repeat follows */
min = stack.currentFrame->locals.max = 1; min = stack.currentFrame->locals.max = 1;
break; break;
} }
/* First, ensure the minimum number of matches are present. */ /* First, ensure the minimum number of matches are present. */
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -928,13 +919,13 @@ RECURSE: ...@@ -928,13 +919,13 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
} }
} }
/* If max == min we can continue with the main loop without the /* If max == min we can continue with the main loop without the
need to recurse. */ need to recurse. */
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
/* If minimizing, keep testing the rest of the expression and advancing /* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */ the pointer while it matches the class. */
if (minimize) { if (minimize) {
...@@ -958,7 +949,7 @@ RECURSE: ...@@ -958,7 +949,7 @@ RECURSE:
/* If maximizing, find the longest possible run, then work backwards. */ /* If maximizing, find the longest possible run, then work backwards. */
else { else {
stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
break; break;
...@@ -979,17 +970,17 @@ RECURSE: ...@@ -979,17 +970,17 @@ RECURSE:
if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
break; /* Stop if tried at original pos */ break; /* Stop if tried at original pos */
} }
RRETURN; RRETURN;
} }
/* Control never reaches here */ /* Control never reaches here */
/* Match an extended character class. */ /* Match an extended character class. */
BEGIN_OPCODE(XCLASS): BEGIN_OPCODE(XCLASS):
stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE; /* Save for matching */ stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE; /* Save for matching */
stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); /* Advance past the item */ stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); /* Advance past the item */
switch (*stack.currentFrame->args.instructionPtr) { switch (*stack.currentFrame->args.instructionPtr) {
case OP_CRSTAR: case OP_CRSTAR:
case OP_CRMINSTAR: case OP_CRMINSTAR:
...@@ -999,7 +990,7 @@ RECURSE: ...@@ -999,7 +990,7 @@ RECURSE:
case OP_CRMINQUERY: case OP_CRMINQUERY:
repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max); repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
break; break;
case OP_CRRANGE: case OP_CRRANGE:
case OP_CRMINRANGE: case OP_CRMINRANGE:
minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE); minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
...@@ -1009,13 +1000,13 @@ RECURSE: ...@@ -1009,13 +1000,13 @@ RECURSE:
stack.currentFrame->locals.max = INT_MAX; stack.currentFrame->locals.max = INT_MAX;
stack.currentFrame->args.instructionPtr += 5; stack.currentFrame->args.instructionPtr += 5;
break; break;
default: /* No repeat follows */ default: /* No repeat follows */
min = stack.currentFrame->locals.max = 1; min = stack.currentFrame->locals.max = 1;
} }
/* First, ensure the minimum number of matches are present. */ /* First, ensure the minimum number of matches are present. */
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -1023,16 +1014,16 @@ RECURSE: ...@@ -1023,16 +1014,16 @@ RECURSE:
if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data)) if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
} }
/* If max == min we can continue with the main loop without the /* If max == min we can continue with the main loop without the
need to recurse. */ need to recurse. */
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
/* If minimizing, keep testing the rest of the expression and advancing /* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */ the pointer while it matches the class. */
if (minimize) { if (minimize) {
for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
...@@ -1046,9 +1037,9 @@ RECURSE: ...@@ -1046,9 +1037,9 @@ RECURSE:
} }
/* Control never reaches here */ /* Control never reaches here */
} }
/* If maximizing, find the longest possible run, then work backwards. */ /* If maximizing, find the longest possible run, then work backwards. */
else { else {
stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
...@@ -1068,11 +1059,11 @@ RECURSE: ...@@ -1068,11 +1059,11 @@ RECURSE:
} }
RRETURN; RRETURN;
} }
/* Control never reaches here */ /* Control never reaches here */
/* Match a single character, casefully */ /* Match a single character, casefully */
BEGIN_OPCODE(CHAR): BEGIN_OPCODE(CHAR):
stack.currentFrame->locals.length = 1; stack.currentFrame->locals.length = 1;
stack.currentFrame->args.instructionPtr++; stack.currentFrame->args.instructionPtr++;
...@@ -1083,9 +1074,9 @@ RECURSE: ...@@ -1083,9 +1074,9 @@ RECURSE:
if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++) if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
NEXT_OPCODE; NEXT_OPCODE;
/* Match a single character, caselessly */ /* Match a single character, caselessly */
BEGIN_OPCODE(CHAR_IGNORING_CASE): { BEGIN_OPCODE(CHAR_IGNORING_CASE): {
stack.currentFrame->locals.length = 1; stack.currentFrame->locals.length = 1;
stack.currentFrame->args.instructionPtr++; stack.currentFrame->args.instructionPtr++;
...@@ -1098,9 +1089,9 @@ RECURSE: ...@@ -1098,9 +1089,9 @@ RECURSE:
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
NEXT_OPCODE; NEXT_OPCODE;
} }
/* Match a single ASCII character. */ /* Match a single ASCII character. */
BEGIN_OPCODE(ASCII_CHAR): BEGIN_OPCODE(ASCII_CHAR):
if (md.endSubject == stack.currentFrame->args.subjectPtr) if (md.endSubject == stack.currentFrame->args.subjectPtr)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -1109,9 +1100,9 @@ RECURSE: ...@@ -1109,9 +1100,9 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
stack.currentFrame->args.instructionPtr += 2; stack.currentFrame->args.instructionPtr += 2;
NEXT_OPCODE; NEXT_OPCODE;
/* Match one of two cases of an ASCII letter. */ /* Match one of two cases of an ASCII letter. */
BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE): BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
if (md.endSubject == stack.currentFrame->args.subjectPtr) if (md.endSubject == stack.currentFrame->args.subjectPtr)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -1120,15 +1111,15 @@ RECURSE: ...@@ -1120,15 +1111,15 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
stack.currentFrame->args.instructionPtr += 2; stack.currentFrame->args.instructionPtr += 2;
NEXT_OPCODE; NEXT_OPCODE;
/* Match a single character repeatedly; different opcodes share code. */ /* Match a single character repeatedly; different opcodes share code. */
BEGIN_OPCODE(EXACT): BEGIN_OPCODE(EXACT):
min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
minimize = false; minimize = false;
stack.currentFrame->args.instructionPtr += 3; stack.currentFrame->args.instructionPtr += 3;
goto REPEATCHAR; goto REPEATCHAR;
BEGIN_OPCODE(UPTO): BEGIN_OPCODE(UPTO):
BEGIN_OPCODE(MINUPTO): BEGIN_OPCODE(MINUPTO):
min = 0; min = 0;
...@@ -1136,7 +1127,7 @@ RECURSE: ...@@ -1136,7 +1127,7 @@ RECURSE:
minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO; minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
stack.currentFrame->args.instructionPtr += 3; stack.currentFrame->args.instructionPtr += 3;
goto REPEATCHAR; goto REPEATCHAR;
BEGIN_OPCODE(STAR): BEGIN_OPCODE(STAR):
BEGIN_OPCODE(MINSTAR): BEGIN_OPCODE(MINSTAR):
BEGIN_OPCODE(PLUS): BEGIN_OPCODE(PLUS):
...@@ -1144,31 +1135,31 @@ RECURSE: ...@@ -1144,31 +1135,31 @@ RECURSE:
BEGIN_OPCODE(QUERY): BEGIN_OPCODE(QUERY):
BEGIN_OPCODE(MINQUERY): BEGIN_OPCODE(MINQUERY):
repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max); repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
/* Common code for all repeated single-character matches. We can give /* Common code for all repeated single-character matches. We can give
up quickly if there are fewer than the minimum number of characters left in up quickly if there are fewer than the minimum number of characters left in
the subject. */ the subject. */
REPEATCHAR: REPEATCHAR:
stack.currentFrame->locals.length = 1; stack.currentFrame->locals.length = 1;
getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length); getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr) if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length; stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
if (stack.currentFrame->locals.fc <= 0xFFFF) { if (stack.currentFrame->locals.fc <= 0xFFFF) {
int othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1; int othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase) if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
if (minimize) { if (minimize) {
stack.currentFrame->locals.repeatOthercase = othercase; stack.currentFrame->locals.repeatOthercase = othercase;
for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
...@@ -1202,16 +1193,16 @@ RECURSE: ...@@ -1202,16 +1193,16 @@ RECURSE:
/* Control never reaches here */ /* Control never reaches here */
} else { } else {
/* No case on surrogate pairs, so no need to bother with "othercase". */ /* No case on surrogate pairs, so no need to bother with "othercase". */
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc) if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->args.subjectPtr += 2; stack.currentFrame->args.subjectPtr += 2;
} }
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
if (minimize) { if (minimize) {
for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
...@@ -1244,9 +1235,9 @@ RECURSE: ...@@ -1244,9 +1235,9 @@ RECURSE:
/* Control never reaches here */ /* Control never reaches here */
} }
/* Control never reaches here */ /* Control never reaches here */
/* Match a negated single one-byte character. */ /* Match a negated single one-byte character. */
BEGIN_OPCODE(NOT): { BEGIN_OPCODE(NOT): {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
...@@ -1263,20 +1254,20 @@ RECURSE: ...@@ -1263,20 +1254,20 @@ RECURSE:
} }
NEXT_OPCODE; NEXT_OPCODE;
} }
/* Match a negated single one-byte character repeatedly. This is almost a /* Match a negated single one-byte character repeatedly. This is almost a
repeat of the code for a repeated single character, but I haven't found a repeat of the code for a repeated single character, but I haven't found a
nice way of commoning these up that doesn't require a test of the nice way of commoning these up that doesn't require a test of the
positive/negative option for each character match. Maybe that wouldn't add positive/negative option for each character match. Maybe that wouldn't add
very much to the time taken, but character matching *is* what this is all very much to the time taken, but character matching *is* what this is all
about... */ about... */
BEGIN_OPCODE(NOTEXACT): BEGIN_OPCODE(NOTEXACT):
min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
minimize = false; minimize = false;
stack.currentFrame->args.instructionPtr += 3; stack.currentFrame->args.instructionPtr += 3;
goto REPEATNOTCHAR; goto REPEATNOTCHAR;
BEGIN_OPCODE(NOTUPTO): BEGIN_OPCODE(NOTUPTO):
BEGIN_OPCODE(NOTMINUPTO): BEGIN_OPCODE(NOTMINUPTO):
min = 0; min = 0;
...@@ -1284,7 +1275,7 @@ RECURSE: ...@@ -1284,7 +1275,7 @@ RECURSE:
minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO; minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
stack.currentFrame->args.instructionPtr += 3; stack.currentFrame->args.instructionPtr += 3;
goto REPEATNOTCHAR; goto REPEATNOTCHAR;
BEGIN_OPCODE(NOTSTAR): BEGIN_OPCODE(NOTSTAR):
BEGIN_OPCODE(NOTMINSTAR): BEGIN_OPCODE(NOTMINSTAR):
BEGIN_OPCODE(NOTPLUS): BEGIN_OPCODE(NOTPLUS):
...@@ -1292,16 +1283,16 @@ RECURSE: ...@@ -1292,16 +1283,16 @@ RECURSE:
BEGIN_OPCODE(NOTQUERY): BEGIN_OPCODE(NOTQUERY):
BEGIN_OPCODE(NOTMINQUERY): BEGIN_OPCODE(NOTMINQUERY):
repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max); repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
/* Common code for all repeated single-byte matches. We can give up quickly /* Common code for all repeated single-byte matches. We can give up quickly
if there are fewer than the minimum number of bytes left in the if there are fewer than the minimum number of bytes left in the
subject. */ subject. */
REPEATNOTCHAR: REPEATNOTCHAR:
if (min > md.endSubject - stack.currentFrame->args.subjectPtr) if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++; stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
/* The code is duplicated for the caseless and caseful cases, for speed, /* The code is duplicated for the caseless and caseful cases, for speed,
since matching characters is likely to be quite common. First, ensure the since matching characters is likely to be quite common. First, ensure the
minimum number of matches are present. If min = max, continue at the same minimum number of matches are present. If min = max, continue at the same
...@@ -1309,13 +1300,13 @@ RECURSE: ...@@ -1309,13 +1300,13 @@ RECURSE:
the expression and advancing one matching character if failing, up to the the expression and advancing one matching character if failing, up to the
maximum. Alternatively, if maximizing, find the maximum number of maximum. Alternatively, if maximizing, find the maximum number of
characters and work backwards. */ characters and work backwards. */
DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max)); DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
if (md.ignoreCase) { if (md.ignoreCase) {
if (stack.currentFrame->locals.fc < 128) if (stack.currentFrame->locals.fc < 128)
stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc); stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
int d = *stack.currentFrame->args.subjectPtr++; int d = *stack.currentFrame->args.subjectPtr++;
if (d < 128) if (d < 128)
...@@ -1323,10 +1314,10 @@ RECURSE: ...@@ -1323,10 +1314,10 @@ RECURSE:
if (stack.currentFrame->locals.fc == d) if (stack.currentFrame->locals.fc == d)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
} }
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
if (minimize) { if (minimize) {
for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
...@@ -1340,12 +1331,12 @@ RECURSE: ...@@ -1340,12 +1331,12 @@ RECURSE:
} }
/* Control never reaches here */ /* Control never reaches here */
} }
/* Maximize case */ /* Maximize case */
else { else {
stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
break; break;
...@@ -1363,14 +1354,14 @@ RECURSE: ...@@ -1363,14 +1354,14 @@ RECURSE:
if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
break; /* Stop if tried at original pos */ break; /* Stop if tried at original pos */
} }
RRETURN; RRETURN;
} }
/* Control never reaches here */ /* Control never reaches here */
} }
/* Caseful comparisons */ /* Caseful comparisons */
else { else {
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
int d = *stack.currentFrame->args.subjectPtr++; int d = *stack.currentFrame->args.subjectPtr++;
...@@ -1380,7 +1371,7 @@ RECURSE: ...@@ -1380,7 +1371,7 @@ RECURSE:
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
if (minimize) { if (minimize) {
for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
...@@ -1392,12 +1383,12 @@ RECURSE: ...@@ -1392,12 +1383,12 @@ RECURSE:
} }
/* Control never reaches here */ /* Control never reaches here */
} }
/* Maximize case */ /* Maximize case */
else { else {
stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
break; break;
...@@ -1418,17 +1409,17 @@ RECURSE: ...@@ -1418,17 +1409,17 @@ RECURSE:
} }
} }
/* Control never reaches here */ /* Control never reaches here */
/* Match a single character type repeatedly; several different opcodes /* Match a single character type repeatedly; several different opcodes
share code. This is very similar to the code for single characters, but we share code. This is very similar to the code for single characters, but we
repeat it in the interests of efficiency. */ repeat it in the interests of efficiency. */
BEGIN_OPCODE(TYPEEXACT): BEGIN_OPCODE(TYPEEXACT):
min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1); min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
minimize = true; minimize = true;
stack.currentFrame->args.instructionPtr += 3; stack.currentFrame->args.instructionPtr += 3;
goto REPEATTYPE; goto REPEATTYPE;
BEGIN_OPCODE(TYPEUPTO): BEGIN_OPCODE(TYPEUPTO):
BEGIN_OPCODE(TYPEMINUPTO): BEGIN_OPCODE(TYPEMINUPTO):
min = 0; min = 0;
...@@ -1436,7 +1427,7 @@ RECURSE: ...@@ -1436,7 +1427,7 @@ RECURSE:
minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO; minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
stack.currentFrame->args.instructionPtr += 3; stack.currentFrame->args.instructionPtr += 3;
goto REPEATTYPE; goto REPEATTYPE;
BEGIN_OPCODE(TYPESTAR): BEGIN_OPCODE(TYPESTAR):
BEGIN_OPCODE(TYPEMINSTAR): BEGIN_OPCODE(TYPEMINSTAR):
BEGIN_OPCODE(TYPEPLUS): BEGIN_OPCODE(TYPEPLUS):
...@@ -1444,19 +1435,19 @@ RECURSE: ...@@ -1444,19 +1435,19 @@ RECURSE:
BEGIN_OPCODE(TYPEQUERY): BEGIN_OPCODE(TYPEQUERY):
BEGIN_OPCODE(TYPEMINQUERY): BEGIN_OPCODE(TYPEMINQUERY):
repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max); repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
/* Common code for all repeated single character type matches. Note that /* Common code for all repeated single character type matches. Note that
in UTF-8 mode, '.' matches a character of any length, but for the other in UTF-8 mode, '.' matches a character of any length, but for the other
character types, the valid characters are all one-byte long. */ character types, the valid characters are all one-byte long. */
REPEATTYPE: REPEATTYPE:
stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++; /* Code for the character type */ stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++; /* Code for the character type */
/* First, ensure the minimum number of matches are present. Use inline /* First, ensure the minimum number of matches are present. Use inline
code for maximizing the speed, and do the type test once at the start code for maximizing the speed, and do the type test once at the start
(i.e. keep it out of the loop). Also we can test that there are at least (i.e. keep it out of the loop). Also we can test that there are at least
the minimum number of characters before we start. */ the minimum number of characters before we start. */
if (min > md.endSubject - stack.currentFrame->args.subjectPtr) if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
RRETURN_NO_MATCH; RRETURN_NO_MATCH;
if (min > 0) { if (min > 0) {
...@@ -1468,7 +1459,7 @@ RECURSE: ...@@ -1468,7 +1459,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_NOT_DIGIT: case OP_NOT_DIGIT:
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (isASCIIDigit(*stack.currentFrame->args.subjectPtr)) if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
...@@ -1476,7 +1467,7 @@ RECURSE: ...@@ -1476,7 +1467,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_DIGIT: case OP_DIGIT:
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr)) if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
...@@ -1484,7 +1475,7 @@ RECURSE: ...@@ -1484,7 +1475,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_NOT_WHITESPACE: case OP_NOT_WHITESPACE:
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (isSpaceChar(*stack.currentFrame->args.subjectPtr)) if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
...@@ -1492,7 +1483,7 @@ RECURSE: ...@@ -1492,7 +1483,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_WHITESPACE: case OP_WHITESPACE:
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (!isSpaceChar(*stack.currentFrame->args.subjectPtr)) if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
...@@ -1500,7 +1491,7 @@ RECURSE: ...@@ -1500,7 +1491,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_NOT_WORDCHAR: case OP_NOT_WORDCHAR:
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (isWordChar(*stack.currentFrame->args.subjectPtr)) if (isWordChar(*stack.currentFrame->args.subjectPtr))
...@@ -1508,7 +1499,7 @@ RECURSE: ...@@ -1508,7 +1499,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_WORDCHAR: case OP_WORDCHAR:
for (int i = 1; i <= min; i++) { for (int i = 1; i <= min; i++) {
if (!isWordChar(*stack.currentFrame->args.subjectPtr)) if (!isWordChar(*stack.currentFrame->args.subjectPtr))
...@@ -1516,21 +1507,21 @@ RECURSE: ...@@ -1516,21 +1507,21 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
default: default:
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
return matchError(JSRegExpErrorInternal, stack); return matchError(JSRegExpErrorInternal, stack);
} /* End switch(stack.currentFrame->locals.ctype) */ } /* End switch(stack.currentFrame->locals.ctype) */
} }
/* If min = max, continue at the same level without recursing */ /* If min = max, continue at the same level without recursing */
if (min == stack.currentFrame->locals.max) if (min == stack.currentFrame->locals.max)
NEXT_OPCODE; NEXT_OPCODE;
/* If minimizing, we have to test the rest of the pattern before each /* If minimizing, we have to test the rest of the pattern before each
subsequent match. */ subsequent match. */
if (minimize) { if (minimize) {
for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) { for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
...@@ -1538,44 +1529,44 @@ RECURSE: ...@@ -1538,44 +1529,44 @@ RECURSE:
RRETURN; RRETURN;
if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
RRETURN; RRETURN;
int c = *stack.currentFrame->args.subjectPtr++; int c = *stack.currentFrame->args.subjectPtr++;
switch (stack.currentFrame->locals.ctype) { switch (stack.currentFrame->locals.ctype) {
case OP_NOT_NEWLINE: case OP_NOT_NEWLINE:
if (isNewline(c)) if (isNewline(c))
RRETURN; RRETURN;
break; break;
case OP_NOT_DIGIT: case OP_NOT_DIGIT:
if (isASCIIDigit(c)) if (isASCIIDigit(c))
RRETURN; RRETURN;
break; break;
case OP_DIGIT: case OP_DIGIT:
if (!isASCIIDigit(c)) if (!isASCIIDigit(c))
RRETURN; RRETURN;
break; break;
case OP_NOT_WHITESPACE: case OP_NOT_WHITESPACE:
if (isSpaceChar(c)) if (isSpaceChar(c))
RRETURN; RRETURN;
break; break;
case OP_WHITESPACE: case OP_WHITESPACE:
if (!isSpaceChar(c)) if (!isSpaceChar(c))
RRETURN; RRETURN;
break; break;
case OP_NOT_WORDCHAR: case OP_NOT_WORDCHAR:
if (isWordChar(c)) if (isWordChar(c))
RRETURN; RRETURN;
break; break;
case OP_WORDCHAR: case OP_WORDCHAR:
if (!isWordChar(c)) if (!isWordChar(c))
RRETURN; RRETURN;
break; break;
default: default:
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
return matchError(JSRegExpErrorInternal, stack); return matchError(JSRegExpErrorInternal, stack);
...@@ -1583,13 +1574,13 @@ RECURSE: ...@@ -1583,13 +1574,13 @@ RECURSE:
} }
/* Control never reaches here */ /* Control never reaches here */
} }
/* If maximizing it is worth using inline code for speed, doing the type /* If maximizing it is worth using inline code for speed, doing the type
test once at the start (i.e. keep it out of the loop). */ test once at the start (i.e. keep it out of the loop). */
else { else {
stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */ stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */
switch (stack.currentFrame->locals.ctype) { switch (stack.currentFrame->locals.ctype) {
case OP_NOT_NEWLINE: case OP_NOT_NEWLINE:
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
...@@ -1598,7 +1589,7 @@ RECURSE: ...@@ -1598,7 +1589,7 @@ RECURSE:
stack.currentFrame->args.subjectPtr++; stack.currentFrame->args.subjectPtr++;
} }
break; break;
case OP_NOT_DIGIT: case OP_NOT_DIGIT:
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
...@@ -1609,7 +1600,7 @@ RECURSE: ...@@ -1609,7 +1600,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_DIGIT: case OP_DIGIT:
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
...@@ -1620,7 +1611,7 @@ RECURSE: ...@@ -1620,7 +1611,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_NOT_WHITESPACE: case OP_NOT_WHITESPACE:
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
...@@ -1631,7 +1622,7 @@ RECURSE: ...@@ -1631,7 +1622,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_WHITESPACE: case OP_WHITESPACE:
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
...@@ -1642,7 +1633,7 @@ RECURSE: ...@@ -1642,7 +1633,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_NOT_WORDCHAR: case OP_NOT_WORDCHAR:
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
...@@ -1653,7 +1644,7 @@ RECURSE: ...@@ -1653,7 +1644,7 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
case OP_WORDCHAR: case OP_WORDCHAR:
for (int i = min; i < stack.currentFrame->locals.max; i++) { for (int i = min; i < stack.currentFrame->locals.max; i++) {
if (stack.currentFrame->args.subjectPtr >= md.endSubject) if (stack.currentFrame->args.subjectPtr >= md.endSubject)
...@@ -1664,14 +1655,14 @@ RECURSE: ...@@ -1664,14 +1655,14 @@ RECURSE:
++stack.currentFrame->args.subjectPtr; ++stack.currentFrame->args.subjectPtr;
} }
break; break;
default: default:
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
return matchError(JSRegExpErrorInternal, stack); return matchError(JSRegExpErrorInternal, stack);
} }
/* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */ /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
for (;;) { for (;;) {
RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
if (isMatch) if (isMatch)
...@@ -1679,13 +1670,13 @@ RECURSE: ...@@ -1679,13 +1670,13 @@ RECURSE:
if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
break; /* Stop if tried at original pos */ break; /* Stop if tried at original pos */
} }
/* Get here if we can't make it match with any permitted repetitions */ /* Get here if we can't make it match with any permitted repetitions */
RRETURN; RRETURN;
} }
/* Control never reaches here */ /* Control never reaches here */
BEGIN_OPCODE(CRMINPLUS): BEGIN_OPCODE(CRMINPLUS):
BEGIN_OPCODE(CRMINQUERY): BEGIN_OPCODE(CRMINQUERY):
BEGIN_OPCODE(CRMINRANGE): BEGIN_OPCODE(CRMINRANGE):
...@@ -1696,7 +1687,7 @@ RECURSE: ...@@ -1696,7 +1687,7 @@ RECURSE:
BEGIN_OPCODE(CRSTAR): BEGIN_OPCODE(CRSTAR):
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
return matchError(JSRegExpErrorInternal, stack); return matchError(JSRegExpErrorInternal, stack);
#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
CAPTURING_BRACKET: CAPTURING_BRACKET:
#else #else
...@@ -1707,71 +1698,71 @@ RECURSE: ...@@ -1707,71 +1698,71 @@ RECURSE:
mustn't change the current values of the data slot, because they may be set mustn't change the current values of the data slot, because they may be set
from a previous iteration of this group, and be referred to by a reference from a previous iteration of this group, and be referred to by a reference
inside the group. inside the group.
If the bracket fails to match, we need to restore this value and also the If the bracket fails to match, we need to restore this value and also the
values of the final offsets, in case they were set by a previous iteration of values of the final offsets, in case they were set by a previous iteration of
the same bracket. the same bracket.
If there isn't enough space in the offset vector, treat this as if it were a If there isn't enough space in the offset vector, treat this as if it were a
non-capturing bracket. Don't worry about setting the flag for the error case non-capturing bracket. Don't worry about setting the flag for the error case
here; that is handled in the code for KET. */ here; that is handled in the code for KET. */
ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA); ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA; stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
/* For extended extraction brackets (large number), we have to fish out the /* For extended extraction brackets (large number), we have to fish out the
number from a dummy opcode at the start. */ number from a dummy opcode at the start. */
if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX) if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE); stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE);
stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1; stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
#ifdef DEBUG #ifdef DEBUG
printf("start bracket %d subject=", stack.currentFrame->locals.number); printf("start bracket %d subject=", stack.currentFrame->locals.number);
pchars(stack.currentFrame->args.subjectPtr, 16, true, md); pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
printf("\n"); printf("\n");
#endif #endif
if (stack.currentFrame->locals.offset < md.offsetMax) { if (stack.currentFrame->locals.offset < md.offsetMax) {
stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset]; stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset];
stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1]; stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1];
stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number]; stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3)); DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3));
md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject; md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
do { do {
RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
if (isMatch) if (isMatch)
RRETURN; RRETURN;
stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
} while (*stack.currentFrame->args.instructionPtr == OP_ALT); } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number)); DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1; md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1;
md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2; md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2;
md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3; md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3;
RRETURN; RRETURN;
} }
/* Insufficient room for saving captured contents */ /* Insufficient room for saving captured contents */
goto NON_CAPTURING_BRACKET; goto NON_CAPTURING_BRACKET;
} }
/* Do not stick any code in here without much thought; it is assumed /* Do not stick any code in here without much thought; it is assumed
that "continue" in the code above comes out to here to repeat the main that "continue" in the code above comes out to here to repeat the main
loop. */ loop. */
} /* End of main loop */ } /* End of main loop */
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
RRETURN_SWITCH: RRETURN_SWITCH:
switch (stack.currentFrame->returnLocation) { switch (stack.currentFrame->returnLocation) {
case 0: goto RETURN; case 0: goto RETURN;
...@@ -1802,12 +1793,12 @@ RRETURN_SWITCH: ...@@ -1802,12 +1793,12 @@ RRETURN_SWITCH:
case 48: goto RRETURN_48; case 48: goto RRETURN_48;
case 52: goto RRETURN_52; case 52: goto RRETURN_52;
} }
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
return matchError(JSRegExpErrorInternal, stack); return matchError(JSRegExpErrorInternal, stack);
#endif #endif
RETURN: RETURN:
return isMatch; return isMatch;
} }
...@@ -1837,13 +1828,12 @@ Returns: > 0 => success; value is the number of elements filled in ...@@ -1837,13 +1828,12 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem < -1 => some kind of unexpected problem
*/ */
template <typename Char> static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
static void tryFirstByteOptimization(const Char*& subjectPtr, const Char* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const Char* originalSubjectStart)
{ {
// If first_byte is set, try scanning to the first instance of that byte // If first_byte is set, try scanning to the first instance of that byte
// no need to try and match against any earlier part of the subject string. // no need to try and match against any earlier part of the subject string.
if (first_byte >= 0) { if (first_byte >= 0) {
Char first_char = first_byte; UChar first_char = first_byte;
if (first_byte_caseless) if (first_byte_caseless)
while (subjectPtr < endSubject) { while (subjectPtr < endSubject) {
int c = *subjectPtr; int c = *subjectPtr;
...@@ -1867,8 +1857,7 @@ static void tryFirstByteOptimization(const Char*& subjectPtr, const Char* endSub ...@@ -1867,8 +1857,7 @@ static void tryFirstByteOptimization(const Char*& subjectPtr, const Char* endSub
} }
} }
template <typename Char> static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const UChar*& reqBytePtr)
static bool tryRequiredByteOptimization(const Char*& subjectPtr, const Char* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const Char*& reqBytePtr)
{ {
/* If req_byte is set, we know that that character must appear in the subject /* If req_byte is set, we know that that character must appear in the subject
for the match to succeed. If the first character is set, req_byte must be for the match to succeed. If the first character is set, req_byte must be
...@@ -1877,7 +1866,7 @@ static bool tryRequiredByteOptimization(const Char*& subjectPtr, const Char* end ...@@ -1877,7 +1866,7 @@ static bool tryRequiredByteOptimization(const Char*& subjectPtr, const Char* end
unlimited repeats that aren't going to match. Writing separate code for unlimited repeats that aren't going to match. Writing separate code for
cased/caseless versions makes it go faster, as does using an autoincrement cased/caseless versions makes it go faster, as does using an autoincrement
and backing off on a match. and backing off on a match.
HOWEVER: when the subject string is very, very long, searching to its end can HOWEVER: when the subject string is very, very long, searching to its end can
take a long time, and give bad performance on quite ordinary patterns. This take a long time, and give bad performance on quite ordinary patterns. This
showed up when somebody was matching /^C/ on a 32-megabyte string... so we showed up when somebody was matching /^C/ on a 32-megabyte string... so we
...@@ -1885,7 +1874,7 @@ static bool tryRequiredByteOptimization(const Char*& subjectPtr, const Char* end ...@@ -1885,7 +1874,7 @@ static bool tryRequiredByteOptimization(const Char*& subjectPtr, const Char* end
*/ */
if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) { if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
const Char* p = subjectPtr + (hasFirstByte ? 1 : 0); const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
/* We don't need to repeat the search if we haven't yet reached the /* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */ place we found it at last time. */
...@@ -1923,31 +1912,30 @@ static bool tryRequiredByteOptimization(const Char*& subjectPtr, const Char* end ...@@ -1923,31 +1912,30 @@ static bool tryRequiredByteOptimization(const Char*& subjectPtr, const Char* end
return false; return false;
} }
template <typename Char>
int jsRegExpExecute(const JSRegExp* re, int jsRegExpExecute(const JSRegExp* re,
const Char* subject, int length, int start_offset, int* offsets, const UChar* subject, int length, int start_offset, int* offsets,
int offsetcount) int offsetcount)
{ {
ASSERT(re); ASSERT(re);
ASSERT(subject); ASSERT(subject);
ASSERT(offsetcount >= 0); ASSERT(offsetcount >= 0);
ASSERT(offsets || offsetcount == 0); ASSERT(offsets || offsetcount == 0);
MatchData<Char> matchBlock; MatchData matchBlock;
matchBlock.startSubject = subject; matchBlock.startSubject = subject;
matchBlock.endSubject = matchBlock.startSubject + length; matchBlock.endSubject = matchBlock.startSubject + length;
const Char* endSubject = matchBlock.endSubject; const UChar* endSubject = matchBlock.endSubject;
matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption); matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
matchBlock.ignoreCase = (re->options & IgnoreCaseOption); matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
/* If the expression has got more back references than the offsets supplied can /* If the expression has got more back references than the offsets supplied can
hold, we get a temporary chunk of working store to use during the matching. hold, we get a temporary chunk of working store to use during the matching.
Otherwise, we can use the vector supplied, rounding down its size to a multiple Otherwise, we can use the vector supplied, rounding down its size to a multiple
of 3. */ of 3. */
int ocount = offsetcount - (offsetcount % 3); int ocount = offsetcount - (offsetcount % 3);
// FIXME: This is lame that we have to second-guess our caller here. // FIXME: This is lame that we have to second-guess our caller here.
// The API should change to either fail-hard when we don't have enough offset space // The API should change to either fail-hard when we don't have enough offset space
// or that we shouldn't ask our callers to pre-allocate in the first place. // or that we shouldn't ask our callers to pre-allocate in the first place.
...@@ -1960,36 +1948,36 @@ int jsRegExpExecute(const JSRegExp* re, ...@@ -1960,36 +1948,36 @@ int jsRegExpExecute(const JSRegExp* re,
using_temporary_offsets = true; using_temporary_offsets = true;
} else } else
matchBlock.offsetVector = offsets; matchBlock.offsetVector = offsets;
matchBlock.offsetEnd = ocount; matchBlock.offsetEnd = ocount;
matchBlock.offsetMax = (2*ocount)/3; matchBlock.offsetMax = (2*ocount)/3;
matchBlock.offsetOverflow = false; matchBlock.offsetOverflow = false;
/* Compute the minimum number of offsets that we need to reset each time. Doing /* Compute the minimum number of offsets that we need to reset each time. Doing
this makes a huge difference to execution time when there aren't many brackets this makes a huge difference to execution time when there aren't many brackets
in the pattern. */ in the pattern. */
int resetcount = 2 + re->top_bracket * 2; int resetcount = 2 + re->top_bracket * 2;
if (resetcount > offsetcount) if (resetcount > offsetcount)
resetcount = ocount; resetcount = ocount;
/* Reset the working variable associated with each extraction. These should /* Reset the working variable associated with each extraction. These should
never be used unless previously set, but they get saved and restored, and so we never be used unless previously set, but they get saved and restored, and so we
initialize them to avoid reading uninitialized locations. */ initialize them to avoid reading uninitialized locations. */
if (matchBlock.offsetVector) { if (matchBlock.offsetVector) {
int* iptr = matchBlock.offsetVector + ocount; int* iptr = matchBlock.offsetVector + ocount;
int* iend = iptr - resetcount/2 + 1; int* iend = iptr - resetcount/2 + 1;
while (--iptr >= iend) while (--iptr >= iend)
*iptr = -1; *iptr = -1;
} }
/* Set up the first character to match, if available. The first_byte value is /* Set up the first character to match, if available. The first_byte value is
never set for an anchored regular expression, but the anchoring may be forced never set for an anchored regular expression, but the anchoring may be forced
at run time, so we have to test for anchoring. The first char may be unset for at run time, so we have to test for anchoring. The first char may be unset for
an unanchored pattern, of course. If there's no first char and the pattern was an unanchored pattern, of course. If there's no first char and the pattern was
studied, there may be a bitmap of possible first characters. */ studied, there may be a bitmap of possible first characters. */
bool first_byte_caseless = false; bool first_byte_caseless = false;
int first_byte = -1; int first_byte = -1;
if (re->options & UseFirstByteOptimizationOption) { if (re->options & UseFirstByteOptimizationOption) {
...@@ -1997,10 +1985,10 @@ int jsRegExpExecute(const JSRegExp* re, ...@@ -1997,10 +1985,10 @@ int jsRegExpExecute(const JSRegExp* re,
if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE))) if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
first_byte = toLowerCase(first_byte); first_byte = toLowerCase(first_byte);
} }
/* For anchored or unanchored matches, there may be a "last known required /* For anchored or unanchored matches, there may be a "last known required
character" set. */ character" set. */
bool req_byte_caseless = false; bool req_byte_caseless = false;
int req_byte = -1; int req_byte = -1;
int req_byte2 = -1; int req_byte2 = -1;
...@@ -2009,14 +1997,14 @@ int jsRegExpExecute(const JSRegExp* re, ...@@ -2009,14 +1997,14 @@ int jsRegExpExecute(const JSRegExp* re,
req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE); req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
req_byte2 = flipCase(req_byte); req_byte2 = flipCase(req_byte);
} }
/* Loop for handling unanchored repeated matching attempts; for anchored regexs /* Loop for handling unanchored repeated matching attempts; for anchored regexs
the loop runs just once. */ the loop runs just once. */
const Char* startMatch = subject + start_offset; const UChar* startMatch = subject + start_offset;
const Char* reqBytePtr = startMatch - 1; const UChar* reqBytePtr = startMatch - 1;
bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption; bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
do { do {
/* Reset the maximum number of extractions we might see. */ /* Reset the maximum number of extractions we might see. */
if (matchBlock.offsetVector) { if (matchBlock.offsetVector) {
...@@ -2025,23 +2013,23 @@ int jsRegExpExecute(const JSRegExp* re, ...@@ -2025,23 +2013,23 @@ int jsRegExpExecute(const JSRegExp* re,
while (iptr < iend) while (iptr < iend)
*iptr++ = -1; *iptr++ = -1;
} }
tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset); tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, reqBytePtr)) if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, reqBytePtr))
break; break;
/* When a match occurs, substrings will be set for all internal extractions; /* When a match occurs, substrings will be set for all internal extractions;
we just need to set up the whole thing as substring 0 before returning. If we just need to set up the whole thing as substring 0 before returning. If
there were too many extractions, set the return code to zero. In the case there were too many extractions, set the return code to zero. In the case
where we had to get some local store to hold offsets for backreferences, copy where we had to get some local store to hold offsets for backreferences, copy
those back references that we can. In this case there need not be overflow those back references that we can. In this case there need not be overflow
if certain parts of the pattern were not used. */ if certain parts of the pattern were not used. */
/* The code starts after the JSRegExp block and the capture name table. */ /* The code starts after the JSRegExp block and the capture name table. */
const unsigned char* start_code = (const unsigned char*)(re + 1); const unsigned char* start_code = (const unsigned char*)(re + 1);
int returnCode = match<Char>(startMatch, start_code, 2, matchBlock); int returnCode = match(startMatch, start_code, 2, matchBlock);
/* When the result is no match, advance the pointer to the next character /* When the result is no match, advance the pointer to the next character
and continue. */ and continue. */
if (returnCode == 0) { if (returnCode == 0) {
...@@ -2054,10 +2042,10 @@ int jsRegExpExecute(const JSRegExp* re, ...@@ -2054,10 +2042,10 @@ int jsRegExpExecute(const JSRegExp* re,
DPRINTF((">>>> error: returning %d\n", returnCode)); DPRINTF((">>>> error: returning %d\n", returnCode));
return returnCode; return returnCode;
} }
/* We have a match! Copy the offset information from temporary store if /* We have a match! Copy the offset information from temporary store if
necessary */ necessary */
if (using_temporary_offsets) { if (using_temporary_offsets) {
if (offsetcount >= 4) { if (offsetcount >= 4) {
memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int)); memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int));
...@@ -2065,41 +2053,29 @@ int jsRegExpExecute(const JSRegExp* re, ...@@ -2065,41 +2053,29 @@ int jsRegExpExecute(const JSRegExp* re,
} }
if (matchBlock.endOffsetTop > offsetcount) if (matchBlock.endOffsetTop > offsetcount)
matchBlock.offsetOverflow = true; matchBlock.offsetOverflow = true;
DPRINTF(("Freeing temporary memory\n")); DPRINTF(("Freeing temporary memory\n"));
delete [] matchBlock.offsetVector; delete [] matchBlock.offsetVector;
} }
returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2; returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
if (offsetcount < 2) if (offsetcount < 2)
returnCode = 0; returnCode = 0;
else { else {
offsets[0] = startMatch - matchBlock.startSubject; offsets[0] = startMatch - matchBlock.startSubject;
offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject; offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
} }
DPRINTF((">>>> returning %d\n", returnCode)); DPRINTF((">>>> returning %d\n", returnCode));
return returnCode; return returnCode;
} while (!(re->options & IsAnchoredOption) && startMatch <= endSubject); } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
if (using_temporary_offsets) { if (using_temporary_offsets) {
DPRINTF(("Freeing temporary memory\n")); DPRINTF(("Freeing temporary memory\n"));
delete [] matchBlock.offsetVector; delete [] matchBlock.offsetVector;
} }
DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
return JSRegExpErrorNoMatch; return JSRegExpErrorNoMatch;
} }
template
int jsRegExpExecute<unsigned short>(const JSRegExp* re,
const unsigned short* subject,
int length, int start_offset,
int* offsets, int offsetcount);
template
int jsRegExpExecute<char>(const JSRegExp* re,
const char* subject,
int length, int start_offset,
int* offsets, int offsetcount);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment