Commit 06536894 authored by lrn@chromium.org's avatar lrn@chromium.org

Fix Chromium issue 47824.

In rare cases a two-byte string was mistaken for an ascii-string.

Review URL: http://codereview.chromium.org/2858033

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4985 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent a23a17bc
......@@ -197,7 +197,17 @@ void TransformToFastProperties(Handle<JSObject> object,
void FlattenString(Handle<String> string) {
CALL_HEAP_FUNCTION_VOID(string->TryFlatten());
}
Handle<String> FlattenGetString(Handle<String> string) {
Handle<String> result;
CALL_AND_RETRY(string->TryFlatten(),
{ result = Handle<String>(String::cast(__object__));
break; },
return Handle<String>());
ASSERT(string->IsFlat());
return result;
}
......
......@@ -193,8 +193,14 @@ void NormalizeProperties(Handle<JSObject> object,
void NormalizeElements(Handle<JSObject> object);
void TransformToFastProperties(Handle<JSObject> object,
int unused_property_fields);
// Flattens a string.
void FlattenString(Handle<String> str);
// Flattens a string and returns the underlying external or sequential
// string.
Handle<String> FlattenGetString(Handle<String> str);
Handle<Object> SetProperty(Handle<JSObject> object,
Handle<String> key,
Handle<Object> value,
......
......@@ -356,7 +356,16 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
if (!subject->IsFlat()) {
FlattenString(subject);
}
bool is_ascii = subject->IsAsciiRepresentation();
// Check the asciiness of the underlying storage.
bool is_ascii;
{
AssertNoAllocation no_gc;
String* sequential_string = *subject;
if (subject->IsConsString()) {
sequential_string = ConsString::cast(*subject)->first();
}
is_ascii = sequential_string->IsAsciiRepresentation();
}
if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return -1;
}
......@@ -381,6 +390,11 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
ASSERT(index <= subject->length());
ASSERT(subject->IsFlat());
// A flat ASCII string might have a two-byte first part.
if (subject->IsConsString()) {
subject = Handle<String>(ConsString::cast(*subject)->first());
}
#ifndef V8_INTERPRETED_REGEXP
ASSERT(output.length() >=
(IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
......@@ -407,7 +421,7 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
// If result is RETRY, the string has changed representation, and we
// must restart from scratch.
// In this case, it means we must make sure we are prepared to handle
// the, potentially, differen subject (the string can switch between
// the, potentially, different subject (the string can switch between
// being internal and external, and even between being ASCII and UC16,
// but the characters are always the same).
IrregexpPrepare(regexp, subject);
......
......@@ -678,7 +678,7 @@ Object* String::SlowTryFlatten(PretenureFlag pretenure) {
bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
// Externalizing twice leaks the external resouce, so it's
// Externalizing twice leaks the external resource, so it's
// prohibited by the API.
ASSERT(!this->IsExternalString());
#ifdef DEBUG
......
......@@ -120,8 +120,6 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
int start_offset = previous_index;
int end_offset = subject_ptr->length();
bool is_ascii = subject->IsAsciiRepresentation();
// The string has been flattened, so it it is a cons string it contains the
// full string in the first part.
if (StringShape(subject_ptr).IsCons()) {
......@@ -129,7 +127,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
subject_ptr = ConsString::cast(subject_ptr)->first();
}
// Ensure that an underlying string has the same ascii-ness.
ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
bool is_ascii = subject_ptr->IsAsciiRepresentation();
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1;
......
......@@ -2782,13 +2782,17 @@ int Runtime::StringMatch(Handle<String> sub,
// algorithm is unnecessary overhead.
if (pattern_length == 1) {
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
if (sub->IsAsciiRepresentation()) {
String* seq_sub = *sub;
if (seq_sub->IsConsString()) {
seq_sub = ConsString::cast(seq_sub)->first();
}
if (seq_sub->IsAsciiRepresentation()) {
uc16 pchar = pat->Get(0);
if (pchar > String::kMaxAsciiCharCode) {
return -1;
}
Vector<const char> ascii_vector =
sub->ToAsciiVector().SubVector(start_index, subject_length);
seq_sub->ToAsciiVector().SubVector(start_index, subject_length);
const void* pos = memchr(ascii_vector.start(),
static_cast<const char>(pchar),
static_cast<size_t>(ascii_vector.length()));
......@@ -2798,7 +2802,9 @@ int Runtime::StringMatch(Handle<String> sub,
return static_cast<int>(reinterpret_cast<const char*>(pos)
- ascii_vector.start() + start_index);
}
return SingleCharIndexOf(sub->ToUC16Vector(), pat->Get(0), start_index);
return SingleCharIndexOf(seq_sub->ToUC16Vector(),
pat->Get(0),
start_index);
}
if (!pat->IsFlat()) {
......@@ -2806,19 +2812,29 @@ int Runtime::StringMatch(Handle<String> sub,
}
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *sub;
if (seq_sub->IsConsString()) {
seq_sub = ConsString::cast(seq_sub)->first();
}
String* seq_pat = *pat;
if (seq_pat->IsConsString()) {
seq_pat = ConsString::cast(seq_pat)->first();
}
// dispatch on type of strings
if (pat->IsAsciiRepresentation()) {
Vector<const char> pat_vector = pat->ToAsciiVector();
if (sub->IsAsciiRepresentation()) {
return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);
if (seq_pat->IsAsciiRepresentation()) {
Vector<const char> pat_vector = seq_pat->ToAsciiVector();
if (seq_sub->IsAsciiRepresentation()) {
return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
}
return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);
return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
}
Vector<const uc16> pat_vector = pat->ToUC16Vector();
if (sub->IsAsciiRepresentation()) {
return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);
Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
if (seq_sub->IsAsciiRepresentation()) {
return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
}
return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);
return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment