Commit 06536894 authored by lrn@chromium.org's avatar lrn@chromium.org

Fix Chromium issue 47824.

In rare cases a two-byte string was mistaken for an ascii-string.

Review URL: http://codereview.chromium.org/2858033

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4985 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent a23a17bc
...@@ -197,7 +197,17 @@ void TransformToFastProperties(Handle<JSObject> object, ...@@ -197,7 +197,17 @@ void TransformToFastProperties(Handle<JSObject> object,
void FlattenString(Handle<String> string) { void FlattenString(Handle<String> string) {
CALL_HEAP_FUNCTION_VOID(string->TryFlatten()); CALL_HEAP_FUNCTION_VOID(string->TryFlatten());
}
Handle<String> FlattenGetString(Handle<String> string) {
Handle<String> result;
CALL_AND_RETRY(string->TryFlatten(),
{ result = Handle<String>(String::cast(__object__));
break; },
return Handle<String>());
ASSERT(string->IsFlat()); ASSERT(string->IsFlat());
return result;
} }
......
...@@ -193,8 +193,14 @@ void NormalizeProperties(Handle<JSObject> object, ...@@ -193,8 +193,14 @@ void NormalizeProperties(Handle<JSObject> object,
void NormalizeElements(Handle<JSObject> object); void NormalizeElements(Handle<JSObject> object);
void TransformToFastProperties(Handle<JSObject> object, void TransformToFastProperties(Handle<JSObject> object,
int unused_property_fields); int unused_property_fields);
// Flattens a string.
void FlattenString(Handle<String> str); void FlattenString(Handle<String> str);
// Flattens a string and returns the underlying external or sequential
// string.
Handle<String> FlattenGetString(Handle<String> str);
Handle<Object> SetProperty(Handle<JSObject> object, Handle<Object> SetProperty(Handle<JSObject> object,
Handle<String> key, Handle<String> key,
Handle<Object> value, Handle<Object> value,
......
...@@ -356,7 +356,16 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, ...@@ -356,7 +356,16 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
if (!subject->IsFlat()) { if (!subject->IsFlat()) {
FlattenString(subject); FlattenString(subject);
} }
bool is_ascii = subject->IsAsciiRepresentation(); // Check the asciiness of the underlying storage.
bool is_ascii;
{
AssertNoAllocation no_gc;
String* sequential_string = *subject;
if (subject->IsConsString()) {
sequential_string = ConsString::cast(*subject)->first();
}
is_ascii = sequential_string->IsAsciiRepresentation();
}
if (!EnsureCompiledIrregexp(regexp, is_ascii)) { if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return -1; return -1;
} }
...@@ -381,6 +390,11 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, ...@@ -381,6 +390,11 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
ASSERT(index <= subject->length()); ASSERT(index <= subject->length());
ASSERT(subject->IsFlat()); ASSERT(subject->IsFlat());
// A flat ASCII string might have a two-byte first part.
if (subject->IsConsString()) {
subject = Handle<String>(ConsString::cast(*subject)->first());
}
#ifndef V8_INTERPRETED_REGEXP #ifndef V8_INTERPRETED_REGEXP
ASSERT(output.length() >= ASSERT(output.length() >=
(IrregexpNumberOfCaptures(*irregexp) + 1) * 2); (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
...@@ -407,7 +421,7 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, ...@@ -407,7 +421,7 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
// If result is RETRY, the string has changed representation, and we // If result is RETRY, the string has changed representation, and we
// must restart from scratch. // must restart from scratch.
// In this case, it means we must make sure we are prepared to handle // In this case, it means we must make sure we are prepared to handle
// the, potentially, differen subject (the string can switch between // the, potentially, different subject (the string can switch between
// being internal and external, and even between being ASCII and UC16, // being internal and external, and even between being ASCII and UC16,
// but the characters are always the same). // but the characters are always the same).
IrregexpPrepare(regexp, subject); IrregexpPrepare(regexp, subject);
......
...@@ -678,7 +678,7 @@ Object* String::SlowTryFlatten(PretenureFlag pretenure) { ...@@ -678,7 +678,7 @@ Object* String::SlowTryFlatten(PretenureFlag pretenure) {
bool String::MakeExternal(v8::String::ExternalStringResource* resource) { bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
// Externalizing twice leaks the external resouce, so it's // Externalizing twice leaks the external resource, so it's
// prohibited by the API. // prohibited by the API.
ASSERT(!this->IsExternalString()); ASSERT(!this->IsExternalString());
#ifdef DEBUG #ifdef DEBUG
......
...@@ -120,8 +120,6 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( ...@@ -120,8 +120,6 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
int start_offset = previous_index; int start_offset = previous_index;
int end_offset = subject_ptr->length(); int end_offset = subject_ptr->length();
bool is_ascii = subject->IsAsciiRepresentation();
// The string has been flattened, so it it is a cons string it contains the // The string has been flattened, so it it is a cons string it contains the
// full string in the first part. // full string in the first part.
if (StringShape(subject_ptr).IsCons()) { if (StringShape(subject_ptr).IsCons()) {
...@@ -129,7 +127,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( ...@@ -129,7 +127,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
subject_ptr = ConsString::cast(subject_ptr)->first(); subject_ptr = ConsString::cast(subject_ptr)->first();
} }
// Ensure that an underlying string has the same ascii-ness. // Ensure that an underlying string has the same ascii-ness.
ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii); bool is_ascii = subject_ptr->IsAsciiRepresentation();
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString()); ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External // String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1; int char_size_shift = is_ascii ? 0 : 1;
......
...@@ -2782,13 +2782,17 @@ int Runtime::StringMatch(Handle<String> sub, ...@@ -2782,13 +2782,17 @@ int Runtime::StringMatch(Handle<String> sub,
// algorithm is unnecessary overhead. // algorithm is unnecessary overhead.
if (pattern_length == 1) { if (pattern_length == 1) {
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
if (sub->IsAsciiRepresentation()) { String* seq_sub = *sub;
if (seq_sub->IsConsString()) {
seq_sub = ConsString::cast(seq_sub)->first();
}
if (seq_sub->IsAsciiRepresentation()) {
uc16 pchar = pat->Get(0); uc16 pchar = pat->Get(0);
if (pchar > String::kMaxAsciiCharCode) { if (pchar > String::kMaxAsciiCharCode) {
return -1; return -1;
} }
Vector<const char> ascii_vector = Vector<const char> ascii_vector =
sub->ToAsciiVector().SubVector(start_index, subject_length); seq_sub->ToAsciiVector().SubVector(start_index, subject_length);
const void* pos = memchr(ascii_vector.start(), const void* pos = memchr(ascii_vector.start(),
static_cast<const char>(pchar), static_cast<const char>(pchar),
static_cast<size_t>(ascii_vector.length())); static_cast<size_t>(ascii_vector.length()));
...@@ -2798,7 +2802,9 @@ int Runtime::StringMatch(Handle<String> sub, ...@@ -2798,7 +2802,9 @@ int Runtime::StringMatch(Handle<String> sub,
return static_cast<int>(reinterpret_cast<const char*>(pos) return static_cast<int>(reinterpret_cast<const char*>(pos)
- ascii_vector.start() + start_index); - ascii_vector.start() + start_index);
} }
return SingleCharIndexOf(sub->ToUC16Vector(), pat->Get(0), start_index); return SingleCharIndexOf(seq_sub->ToUC16Vector(),
pat->Get(0),
start_index);
} }
if (!pat->IsFlat()) { if (!pat->IsFlat()) {
...@@ -2806,19 +2812,29 @@ int Runtime::StringMatch(Handle<String> sub, ...@@ -2806,19 +2812,29 @@ int Runtime::StringMatch(Handle<String> sub,
} }
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *sub;
if (seq_sub->IsConsString()) {
seq_sub = ConsString::cast(seq_sub)->first();
}
String* seq_pat = *pat;
if (seq_pat->IsConsString()) {
seq_pat = ConsString::cast(seq_pat)->first();
}
// dispatch on type of strings // dispatch on type of strings
if (pat->IsAsciiRepresentation()) { if (seq_pat->IsAsciiRepresentation()) {
Vector<const char> pat_vector = pat->ToAsciiVector(); Vector<const char> pat_vector = seq_pat->ToAsciiVector();
if (sub->IsAsciiRepresentation()) { if (seq_sub->IsAsciiRepresentation()) {
return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
} }
return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
} }
Vector<const uc16> pat_vector = pat->ToUC16Vector(); Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
if (sub->IsAsciiRepresentation()) { if (seq_sub->IsAsciiRepresentation()) {
return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
} }
return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment