Commit 9eb7d4a5 authored by lrn@chromium.org's avatar lrn@chromium.org

Replace ToAsciiVector and ToUC16Vector with single function that returns a tagged value.

The tag tells whether the content is ASCII or UC16, or even if the string wasn't flat.

BUG: v8:1633

Review URL: http://codereview.chromium.org/7709024

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8999 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent bf4222fb
...@@ -617,15 +617,17 @@ Handle<FixedArray> CalculateLineEnds(Handle<String> src, ...@@ -617,15 +617,17 @@ Handle<FixedArray> CalculateLineEnds(Handle<String> src,
{ {
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid. AssertNoAllocation no_heap_allocation; // ensure vectors stay valid.
// Dispatch on type of strings. // Dispatch on type of strings.
if (src->IsAsciiRepresentation()) { String::FlatContent content = src->GetFlatContent(no_heap_allocation);
ASSERT(content.IsFlat());
if (content.IsAscii()) {
CalculateLineEnds(isolate, CalculateLineEnds(isolate,
&line_ends, &line_ends,
src->ToAsciiVector(), content.ToAsciiVector(),
with_last_line); with_last_line);
} else { } else {
CalculateLineEnds(isolate, CalculateLineEnds(isolate,
&line_ends, &line_ends,
src->ToUC16Vector(), content.ToUC16Vector(),
with_last_line); with_last_line);
} }
} }
......
...@@ -778,7 +778,8 @@ void HHasInstanceTypeAndBranch::PrintDataTo(StringStream* stream) { ...@@ -778,7 +778,8 @@ void HHasInstanceTypeAndBranch::PrintDataTo(StringStream* stream) {
void HTypeofIsAndBranch::PrintDataTo(StringStream* stream) { void HTypeofIsAndBranch::PrintDataTo(StringStream* stream) {
value()->PrintNameTo(stream); value()->PrintNameTo(stream);
stream->Add(" == "); stream->Add(" == ");
stream->Add(type_literal_->ToAsciiVector()); AssertNoAllocation no_alloc;
stream->Add(type_literal_->GetFlatContent(no_alloc).ToAsciiVector());
} }
......
// Copyright 2008 the V8 project authors. All rights reserved. // Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without // Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are // modification, are permitted provided that the following conditions are
// met: // met:
...@@ -635,8 +635,9 @@ bool IrregexpInterpreter::Match(Isolate* isolate, ...@@ -635,8 +635,9 @@ bool IrregexpInterpreter::Match(Isolate* isolate,
AssertNoAllocation a; AssertNoAllocation a;
const byte* code_base = code_array->GetDataStartAddress(); const byte* code_base = code_array->GetDataStartAddress();
uc16 previous_char = '\n'; uc16 previous_char = '\n';
if (subject->IsAsciiRepresentation()) { String::FlatContent subject_content = subject->GetFlatContent(a);
Vector<const char> subject_vector = subject->ToAsciiVector(); if (subject_content.IsAscii()) {
Vector<const char> subject_vector = subject_content.ToAsciiVector();
if (start_position != 0) previous_char = subject_vector[start_position - 1]; if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, return RawMatch(isolate,
code_base, code_base,
...@@ -645,7 +646,8 @@ bool IrregexpInterpreter::Match(Isolate* isolate, ...@@ -645,7 +646,8 @@ bool IrregexpInterpreter::Match(Isolate* isolate,
start_position, start_position,
previous_char); previous_char);
} else { } else {
Vector<const uc16> subject_vector = subject->ToUC16Vector(); ASSERT(subject_content.IsTwoByte());
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (start_position != 0) previous_char = subject_vector[start_position - 1]; if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, return RawMatch(isolate,
code_base, code_base,
......
...@@ -212,19 +212,7 @@ static void SetAtomLastCapture(FixedArray* array, ...@@ -212,19 +212,7 @@ static void SetAtomLastCapture(FixedArray* array,
RegExpImpl::SetCapture(array, 1, to); RegExpImpl::SetCapture(array, 1, to);
} }
/* template <typename SubjectChar>, typename PatternChar>
static int ReStringMatch(Vector<const SubjectChar> sub_vector,
Vector<const PatternChar> pat_vector,
int start_index) {
int pattern_length = pat_vector.length();
if (pattern_length == 0) return start_index;
int subject_length = sub_vector.length();
if (start_index + pattern_length > subject_length) return -1;
return SearchString(sub_vector, pat_vector, start_index);
}
*/
Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
Handle<String> subject, Handle<String> subject,
int index, int index,
...@@ -237,35 +225,41 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, ...@@ -237,35 +225,41 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
if (!subject->IsFlat()) FlattenString(subject); if (!subject->IsFlat()) FlattenString(subject);
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness. // Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *subject;
if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
int needle_len = needle->length(); int needle_len = needle->length();
ASSERT(needle->IsFlat());
if (needle_len != 0) { if (needle_len != 0) {
if (index + needle_len > subject->length()) if (index + needle_len > subject->length()) {
return isolate->factory()->null_value(); return isolate->factory()->null_value();
}
String::FlatContent needle_content =
needle->GetFlatContent(no_heap_allocation);
String::FlatContent subject_content =
subject->GetFlatContent(no_heap_allocation);
ASSERT(needle_content.IsFlat());
ASSERT(subject_content.IsFlat());
// dispatch on type of strings // dispatch on type of strings
index = (needle->IsAsciiRepresentation() index = (needle_content.IsAscii()
? (seq_sub->IsAsciiRepresentation() ? (subject_content.IsAscii()
? SearchString(isolate, ? SearchString(isolate,
seq_sub->ToAsciiVector(), subject_content.ToAsciiVector(),
needle->ToAsciiVector(), needle_content.ToAsciiVector(),
index) index)
: SearchString(isolate, : SearchString(isolate,
seq_sub->ToUC16Vector(), subject_content.ToUC16Vector(),
needle->ToAsciiVector(), needle_content.ToAsciiVector(),
index)) index))
: (seq_sub->IsAsciiRepresentation() : (subject_content.IsAscii()
? SearchString(isolate, ? SearchString(isolate,
seq_sub->ToAsciiVector(), subject_content.ToAsciiVector(),
needle->ToUC16Vector(), needle_content.ToUC16Vector(),
index) index)
: SearchString(isolate, : SearchString(isolate,
seq_sub->ToUC16Vector(), subject_content.ToUC16Vector(),
needle->ToUC16Vector(), needle_content.ToUC16Vector(),
index))); index)));
if (index == -1) return isolate->factory()->null_value(); if (index == -1) return isolate->factory()->null_value();
} }
......
...@@ -297,6 +297,11 @@ StringRepresentationTag StringShape::representation_tag() { ...@@ -297,6 +297,11 @@ StringRepresentationTag StringShape::representation_tag() {
} }
uint32_t StringShape::encoding_tag() {
return type_ & kStringEncodingMask;
}
uint32_t StringShape::full_representation_tag() { uint32_t StringShape::full_representation_tag() {
return (type_ & (kStringRepresentationMask | kStringEncodingMask)); return (type_ & (kStringRepresentationMask | kStringEncodingMask));
} }
......
...@@ -5038,55 +5038,38 @@ int String::Utf8Length() { ...@@ -5038,55 +5038,38 @@ int String::Utf8Length() {
} }
Vector<const char> String::ToAsciiVector() { String::FlatContent String::GetFlatContent(const AssertNoAllocation& promise) {
ASSERT(IsAsciiRepresentation()); // Argument isn't used, it's only there to ensure that the user is
ASSERT(IsFlat()); // aware that the extracted vectors may not survive a GC.
int offset = 0;
int length = this->length();
StringRepresentationTag string_tag = StringShape(this).representation_tag();
String* string = this;
if (string_tag == kConsStringTag) {
ConsString* cons = ConsString::cast(string);
ASSERT(cons->second()->length() == 0);
string = cons->first();
string_tag = StringShape(string).representation_tag();
}
if (string_tag == kSeqStringTag) {
SeqAsciiString* seq = SeqAsciiString::cast(string);
char* start = seq->GetChars();
return Vector<const char>(start + offset, length);
}
ASSERT(string_tag == kExternalStringTag);
ExternalAsciiString* ext = ExternalAsciiString::cast(string);
const char* start = ext->resource()->data();
return Vector<const char>(start + offset, length);
}
Vector<const uc16> String::ToUC16Vector() {
ASSERT(IsTwoByteRepresentation());
ASSERT(IsFlat());
int offset = 0;
int length = this->length(); int length = this->length();
StringRepresentationTag string_tag = StringShape(this).representation_tag(); StringShape shape(this);
String* string = this; String* string = this;
if (string_tag == kConsStringTag) { if (shape.representation_tag() == kConsStringTag) {
ConsString* cons = ConsString::cast(string); ConsString* cons = ConsString::cast(string);
ASSERT(cons->second()->length() == 0); if (cons->second()->length() != 0) {
return FlatContent();
}
string = cons->first(); string = cons->first();
string_tag = StringShape(string).representation_tag(); shape = StringShape(string);
} }
if (string_tag == kSeqStringTag) { if (shape.encoding_tag() == kAsciiStringTag) {
SeqTwoByteString* seq = SeqTwoByteString::cast(string); const char* start;
return Vector<const uc16>(seq->GetChars() + offset, length); if (shape.representation_tag() == kSeqStringTag) {
start = SeqAsciiString::cast(string)->GetChars();
} else {
start = ExternalAsciiString::cast(string)->resource()->data();
}
return FlatContent(Vector<const char>(start, length));
} else {
ASSERT(shape.encoding_tag() == kTwoByteStringTag);
const uc16* start;
if (shape.representation_tag() == kSeqStringTag) {
start = SeqTwoByteString::cast(string)->GetChars();
} else {
start = ExternalTwoByteString::cast(string)->resource()->data();
}
return FlatContent(Vector<const uc16>(start, length));
} }
ASSERT(string_tag == kExternalStringTag);
ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
const uc16* start =
reinterpret_cast<const uc16*>(ext->resource()->data());
return Vector<const uc16>(start + offset, length);
} }
...@@ -5536,11 +5519,14 @@ void FlatStringReader::PostGarbageCollection() { ...@@ -5536,11 +5519,14 @@ void FlatStringReader::PostGarbageCollection() {
if (str_ == NULL) return; if (str_ == NULL) return;
Handle<String> str(str_); Handle<String> str(str_);
ASSERT(str->IsFlat()); ASSERT(str->IsFlat());
is_ascii_ = str->IsAsciiRepresentation(); AssertNoAllocation no_alloc;
String::FlatContent content = str->GetFlatContent(no_alloc);
ASSERT(content.is_flat());
is_ascii_ = content.IsAscii();
if (is_ascii_) { if (is_ascii_) {
start_ = str->ToAsciiVector().start(); start_ = content.ToAsciiVector().start();
} else { } else {
start_ = str->ToUC16Vector().start(); start_ = content.ToUC16Vector().start();
} }
} }
...@@ -5860,12 +5846,14 @@ template <typename IteratorA> ...@@ -5860,12 +5846,14 @@ template <typename IteratorA>
static inline bool CompareStringContentsPartial(Isolate* isolate, static inline bool CompareStringContentsPartial(Isolate* isolate,
IteratorA* ia, IteratorA* ia,
String* b) { String* b) {
if (b->IsFlat()) { AssertNoAllocation no_alloc;
if (b->IsAsciiRepresentation()) { String::FlatContent content = b->GetFlatContent(no_alloc);
VectorIterator<char> ib(b->ToAsciiVector()); if (content.IsFlat()) {
if (content.IsAscii()) {
VectorIterator<char> ib(content.ToAsciiVector());
return CompareStringContents(ia, &ib); return CompareStringContents(ia, &ib);
} else { } else {
VectorIterator<uc16> ib(b->ToUC16Vector()); VectorIterator<uc16> ib(content.ToUC16Vector());
return CompareStringContents(ia, &ib); return CompareStringContents(ia, &ib);
} }
} else { } else {
...@@ -5895,6 +5883,8 @@ bool String::SlowEquals(String* other) { ...@@ -5895,6 +5883,8 @@ bool String::SlowEquals(String* other) {
String* lhs = this->TryFlattenGetString(); String* lhs = this->TryFlattenGetString();
String* rhs = other->TryFlattenGetString(); String* rhs = other->TryFlattenGetString();
AssertNoAllocation no_alloc;
if (StringShape(lhs).IsSequentialAscii() && if (StringShape(lhs).IsSequentialAscii() &&
StringShape(rhs).IsSequentialAscii()) { StringShape(rhs).IsSequentialAscii()) {
const char* str1 = SeqAsciiString::cast(lhs)->GetChars(); const char* str1 = SeqAsciiString::cast(lhs)->GetChars();
...@@ -5904,16 +5894,18 @@ bool String::SlowEquals(String* other) { ...@@ -5904,16 +5894,18 @@ bool String::SlowEquals(String* other) {
} }
Isolate* isolate = GetIsolate(); Isolate* isolate = GetIsolate();
if (lhs->IsFlat()) { String::FlatContent lhs_content = lhs->GetFlatContent(no_alloc);
if (lhs->IsAsciiRepresentation()) { String::FlatContent rhs_content = rhs->GetFlatContent(no_alloc);
Vector<const char> vec1 = lhs->ToAsciiVector(); if (lhs_content.IsFlat()) {
if (rhs->IsFlat()) { if (lhs_content.IsAscii()) {
if (rhs->IsAsciiRepresentation()) { Vector<const char> vec1 = lhs_content.ToAsciiVector();
Vector<const char> vec2 = rhs->ToAsciiVector(); if (rhs_content.IsFlat()) {
if (rhs_content.IsAscii()) {
Vector<const char> vec2 = rhs_content.ToAsciiVector();
return CompareRawStringContents(vec1, vec2); return CompareRawStringContents(vec1, vec2);
} else { } else {
VectorIterator<char> buf1(vec1); VectorIterator<char> buf1(vec1);
VectorIterator<uc16> ib(rhs->ToUC16Vector()); VectorIterator<uc16> ib(rhs_content.ToUC16Vector());
return CompareStringContents(&buf1, &ib); return CompareStringContents(&buf1, &ib);
} }
} else { } else {
...@@ -5923,14 +5915,14 @@ bool String::SlowEquals(String* other) { ...@@ -5923,14 +5915,14 @@ bool String::SlowEquals(String* other) {
isolate->objects_string_compare_buffer_b()); isolate->objects_string_compare_buffer_b());
} }
} else { } else {
Vector<const uc16> vec1 = lhs->ToUC16Vector(); Vector<const uc16> vec1 = lhs_content.ToUC16Vector();
if (rhs->IsFlat()) { if (rhs_content.IsFlat()) {
if (rhs->IsAsciiRepresentation()) { if (rhs_content.IsAscii()) {
VectorIterator<uc16> buf1(vec1); VectorIterator<uc16> buf1(vec1);
VectorIterator<char> ib(rhs->ToAsciiVector()); VectorIterator<char> ib(rhs_content.ToAsciiVector());
return CompareStringContents(&buf1, &ib); return CompareStringContents(&buf1, &ib);
} else { } else {
Vector<const uc16> vec2(rhs->ToUC16Vector()); Vector<const uc16> vec2(rhs_content.ToUC16Vector());
return CompareRawStringContents(vec1, vec2); return CompareRawStringContents(vec1, vec2);
} }
} else { } else {
...@@ -5981,10 +5973,13 @@ bool String::IsEqualTo(Vector<const char> str) { ...@@ -5981,10 +5973,13 @@ bool String::IsEqualTo(Vector<const char> str) {
bool String::IsAsciiEqualTo(Vector<const char> str) { bool String::IsAsciiEqualTo(Vector<const char> str) {
AssertNoAllocation no_alloc;
int slen = length(); int slen = length();
if (str.length() != slen) return false; if (str.length() != slen) return false;
if (IsFlat() && IsAsciiRepresentation()) { FlatContent content = GetFlatContent(no_alloc);
return CompareChars(ToAsciiVector().start(), str.start(), slen) == 0; if (content.IsAscii()) {
return CompareChars(content.ToAsciiVector().start(),
str.start(), slen) == 0;
} }
for (int i = 0; i < slen; i++) { for (int i = 0; i < slen; i++) {
if (Get(i) != static_cast<uint16_t>(str[i])) return false; if (Get(i) != static_cast<uint16_t>(str[i])) return false;
...@@ -5994,10 +5989,12 @@ bool String::IsAsciiEqualTo(Vector<const char> str) { ...@@ -5994,10 +5989,12 @@ bool String::IsAsciiEqualTo(Vector<const char> str) {
bool String::IsTwoByteEqualTo(Vector<const uc16> str) { bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
AssertNoAllocation no_alloc;
int slen = length(); int slen = length();
if (str.length() != slen) return false; if (str.length() != slen) return false;
if (IsFlat() && IsTwoByteRepresentation()) { FlatContent content = GetFlatContent(no_alloc);
return CompareChars(ToUC16Vector().start(), str.start(), slen) == 0; if (content.IsTwoByte()) {
return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
} }
for (int i = 0; i < slen; i++) { for (int i = 0; i < slen; i++) {
if (Get(i) != str[i]) return false; if (Get(i) != str[i]) return false;
......
...@@ -5789,6 +5789,7 @@ class StringShape BASE_EMBEDDED { ...@@ -5789,6 +5789,7 @@ class StringShape BASE_EMBEDDED {
inline bool IsSequentialTwoByte(); inline bool IsSequentialTwoByte();
inline bool IsSymbol(); inline bool IsSymbol();
inline StringRepresentationTag representation_tag(); inline StringRepresentationTag representation_tag();
inline uint32_t encoding_tag();
inline uint32_t full_representation_tag(); inline uint32_t full_representation_tag();
inline uint32_t size_tag(); inline uint32_t size_tag();
#ifdef DEBUG #ifdef DEBUG
...@@ -5820,6 +5821,51 @@ class StringShape BASE_EMBEDDED { ...@@ -5820,6 +5821,51 @@ class StringShape BASE_EMBEDDED {
// All string values have a length field. // All string values have a length field.
class String: public HeapObject { class String: public HeapObject {
public: public:
// Representation of the flat content of a String.
// A non-flat string doesn't have flat content.
// A flat string has content that's encoded as a sequence of either
// ASCII chars or two-byte UC16.
// Returned by String::GetFlatContent().
class FlatContent {
public:
// Returns true if the string is flat and this structure contains content.
bool IsFlat() { return state_ != NON_FLAT; }
// Returns true if the structure contains ASCII content.
bool IsAscii() { return state_ == ASCII; }
// Returns true if the structure contains two-byte content.
bool IsTwoByte() { return state_ == TWO_BYTE; }
// Return the ASCII content of the string. Only use if IsAscii() returns
// true.
Vector<const char> ToAsciiVector() {
ASSERT_EQ(ASCII, state_);
return Vector<const char>::cast(buffer_);
}
// Return the two-byte content of the string. Only use if IsTwoByte()
// returns true.
Vector<const uc16> ToUC16Vector() {
ASSERT_EQ(TWO_BYTE, state_);
return Vector<const uc16>::cast(buffer_);
}
private:
enum State { NON_FLAT, ASCII, TWO_BYTE };
// Constructors only used by String::GetFlatContent().
explicit FlatContent(Vector<const char> chars)
: buffer_(Vector<const byte>::cast(chars)),
state_(ASCII) { }
explicit FlatContent(Vector<const uc16> chars)
: buffer_(Vector<const byte>::cast(chars)),
state_(TWO_BYTE) { }
FlatContent() : buffer_(), state_(NON_FLAT) { }
Vector<const byte> buffer_;
State state_;
friend class String;
};
// Get and set the length of the string. // Get and set the length of the string.
inline int length(); inline int length();
inline void set_length(int value); inline void set_length(int value);
...@@ -5831,10 +5877,10 @@ class String: public HeapObject { ...@@ -5831,10 +5877,10 @@ class String: public HeapObject {
inline bool IsAsciiRepresentation(); inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation(); inline bool IsTwoByteRepresentation();
// Returns whether this string has ascii chars, i.e. all of them can // Returns whether this string has only ASCII chars, i.e. all of them can
// be ascii encoded. This might be the case even if the string is // be ASCII encoded. This might be the case even if the string is
// two-byte. Such strings may appear when the embedder prefers // two-byte. Such strings may appear when the embedder prefers
// two-byte external representations even for ascii data. // two-byte external representations even for ASCII data.
// //
// NOTE: this should be considered only a hint. False negatives are // NOTE: this should be considered only a hint. False negatives are
// possible. // possible.
...@@ -5868,8 +5914,12 @@ class String: public HeapObject { ...@@ -5868,8 +5914,12 @@ class String: public HeapObject {
// string. // string.
inline String* TryFlattenGetString(PretenureFlag pretenure = NOT_TENURED); inline String* TryFlattenGetString(PretenureFlag pretenure = NOT_TENURED);
Vector<const char> ToAsciiVector(); // Tries to return the content of a flat string as a structure holding either
Vector<const uc16> ToUC16Vector(); // a flat vector of char or of uc16.
// If the string isn't flat, and therefore doesn't have flat content, the
// returned structure will report so, and can't provide a vector of either
// kind.
FlatContent GetFlatContent(const AssertNoAllocation& safety_promise);
// Mark the string as an undetectable object. It only applies to // Mark the string as an undetectable object. It only applies to
// ascii and two byte string types. // ascii and two byte string types.
......
...@@ -2663,21 +2663,22 @@ class CompiledReplacement { ...@@ -2663,21 +2663,22 @@ class CompiledReplacement {
void CompiledReplacement::Compile(Handle<String> replacement, void CompiledReplacement::Compile(Handle<String> replacement,
int capture_count, int capture_count,
int subject_length) { int subject_length) {
ASSERT(replacement->IsFlat()); {
if (replacement->IsAsciiRepresentation()) {
AssertNoAllocation no_alloc;
ParseReplacementPattern(&parts_,
replacement->ToAsciiVector(),
capture_count,
subject_length);
} else {
ASSERT(replacement->IsTwoByteRepresentation());
AssertNoAllocation no_alloc; AssertNoAllocation no_alloc;
String::FlatContent content = replacement->GetFlatContent(no_alloc);
ParseReplacementPattern(&parts_, ASSERT(content.IsFlat());
replacement->ToUC16Vector(), if (content.IsAscii()) {
capture_count, ParseReplacementPattern(&parts_,
subject_length); content.ToAsciiVector(),
capture_count,
subject_length);
} else {
ASSERT(content.IsTwoByte());
ParseReplacementPattern(&parts_,
content.ToUC16Vector(),
capture_count,
subject_length);
}
} }
Isolate* isolate = replacement->GetIsolate(); Isolate* isolate = replacement->GetIsolate();
// Find substrings of replacement string and create them as String objects. // Find substrings of replacement string and create them as String objects.
...@@ -3049,34 +3050,32 @@ int Runtime::StringMatch(Isolate* isolate, ...@@ -3049,34 +3050,32 @@ int Runtime::StringMatch(Isolate* isolate,
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness. // Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *sub; String::FlatContent seq_sub = sub->GetFlatContent(no_heap_allocation);
if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); String::FlatContent seq_pat = pat->GetFlatContent(no_heap_allocation);
String* seq_pat = *pat;
if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
// dispatch on type of strings // dispatch on type of strings
if (seq_pat->IsAsciiRepresentation()) { if (seq_pat.IsAscii()) {
Vector<const char> pat_vector = seq_pat->ToAsciiVector(); Vector<const char> pat_vector = seq_pat.ToAsciiVector();
if (seq_sub->IsAsciiRepresentation()) { if (seq_sub.IsAscii()) {
return SearchString(isolate, return SearchString(isolate,
seq_sub->ToAsciiVector(), seq_sub.ToAsciiVector(),
pat_vector, pat_vector,
start_index); start_index);
} }
return SearchString(isolate, return SearchString(isolate,
seq_sub->ToUC16Vector(), seq_sub.ToUC16Vector(),
pat_vector, pat_vector,
start_index); start_index);
} }
Vector<const uc16> pat_vector = seq_pat->ToUC16Vector(); Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
if (seq_sub->IsAsciiRepresentation()) { if (seq_sub.IsAscii()) {
return SearchString(isolate, return SearchString(isolate,
seq_sub->ToAsciiVector(), seq_sub.ToAsciiVector(),
pat_vector, pat_vector,
start_index); start_index);
} }
return SearchString(isolate, return SearchString(isolate,
seq_sub->ToUC16Vector(), seq_sub.ToUC16Vector(),
pat_vector, pat_vector,
start_index); start_index);
} }
...@@ -3161,31 +3160,29 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) { ...@@ -3161,31 +3160,29 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) {
int position = -1; int position = -1;
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *sub; String::FlatContent sub_content = sub->GetFlatContent(no_heap_allocation);
if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); String::FlatContent pat_content = pat->GetFlatContent(no_heap_allocation);
String* seq_pat = *pat;
if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first(); if (pat_content.IsAscii()) {
Vector<const char> pat_vector = pat_content.ToAsciiVector();
if (seq_pat->IsAsciiRepresentation()) { if (sub_content.IsAscii()) {
Vector<const char> pat_vector = seq_pat->ToAsciiVector(); position = StringMatchBackwards(sub_content.ToAsciiVector(),
if (seq_sub->IsAsciiRepresentation()) {
position = StringMatchBackwards(seq_sub->ToAsciiVector(),
pat_vector, pat_vector,
start_index); start_index);
} else { } else {
position = StringMatchBackwards(seq_sub->ToUC16Vector(), position = StringMatchBackwards(sub_content.ToUC16Vector(),
pat_vector, pat_vector,
start_index); start_index);
} }
} else { } else {
Vector<const uc16> pat_vector = seq_pat->ToUC16Vector(); Vector<const uc16> pat_vector = pat_content.ToUC16Vector();
if (seq_sub->IsAsciiRepresentation()) { if (sub_content.IsAscii()) {
position = StringMatchBackwards(seq_sub->ToAsciiVector(), position = StringMatchBackwards(sub_content.ToAsciiVector(),
pat_vector, pat_vector,
start_index); start_index);
} else { } else {
position = StringMatchBackwards(seq_sub->ToUC16Vector(), position = StringMatchBackwards(sub_content.ToUC16Vector(),
pat_vector, pat_vector,
start_index); start_index);
} }
...@@ -3403,36 +3400,38 @@ static bool SearchStringMultiple(Isolate* isolate, ...@@ -3403,36 +3400,38 @@ static bool SearchStringMultiple(Isolate* isolate,
for (;;) { // Break when search complete. for (;;) { // Break when search complete.
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
AssertNoAllocation no_gc; AssertNoAllocation no_gc;
if (subject->IsAsciiRepresentation()) { String::FlatContent subject_content = subject->GetFlatContent(no_gc);
Vector<const char> subject_vector = subject->ToAsciiVector(); String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
if (pattern->IsAsciiRepresentation()) { if (subject_content.IsAscii()) {
Vector<const char> subject_vector = subject_content.ToAsciiVector();
if (pattern_content.IsAscii()) {
if (SearchStringMultiple(isolate, if (SearchStringMultiple(isolate,
subject_vector, subject_vector,
pattern->ToAsciiVector(), pattern_content.ToAsciiVector(),
*pattern, *pattern,
builder, builder,
&match_pos)) break; &match_pos)) break;
} else { } else {
if (SearchStringMultiple(isolate, if (SearchStringMultiple(isolate,
subject_vector, subject_vector,
pattern->ToUC16Vector(), pattern_content.ToUC16Vector(),
*pattern, *pattern,
builder, builder,
&match_pos)) break; &match_pos)) break;
} }
} else { } else {
Vector<const uc16> subject_vector = subject->ToUC16Vector(); Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern->IsAsciiRepresentation()) { if (pattern_content.IsAscii()) {
if (SearchStringMultiple(isolate, if (SearchStringMultiple(isolate,
subject_vector, subject_vector,
pattern->ToAsciiVector(), pattern_content.ToAsciiVector(),
*pattern, *pattern,
builder, builder,
&match_pos)) break; &match_pos)) break;
} else { } else {
if (SearchStringMultiple(isolate, if (SearchStringMultiple(isolate,
subject_vector, subject_vector,
pattern->ToUC16Vector(), pattern_content.ToUC16Vector(),
*pattern, *pattern,
builder, builder,
&match_pos)) break; &match_pos)) break;
...@@ -5420,12 +5419,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONString) { ...@@ -5420,12 +5419,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONString) {
str = String::cast(flat); str = String::cast(flat);
ASSERT(str->IsFlat()); ASSERT(str->IsFlat());
} }
if (str->IsTwoByteRepresentation()) { AssertNoAllocation no_alloc;
String::FlatContent flat = str->GetFlatContent(no_alloc);
ASSERT(flat.IsFlat());
if (flat.IsTwoByte()) {
return QuoteJsonString<uc16, SeqTwoByteString, false>(isolate, return QuoteJsonString<uc16, SeqTwoByteString, false>(isolate,
str->ToUC16Vector()); flat.ToUC16Vector());
} else { } else {
return QuoteJsonString<char, SeqAsciiString, false>(isolate, return QuoteJsonString<char, SeqAsciiString, false>(isolate,
str->ToAsciiVector()); flat.ToAsciiVector());
} }
} }
...@@ -5442,12 +5444,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONStringComma) { ...@@ -5442,12 +5444,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONStringComma) {
str = String::cast(flat); str = String::cast(flat);
ASSERT(str->IsFlat()); ASSERT(str->IsFlat());
} }
if (str->IsTwoByteRepresentation()) { AssertNoAllocation no_alloc;
String::FlatContent flat = str->GetFlatContent(no_alloc);
if (flat.IsTwoByte()) {
return QuoteJsonString<uc16, SeqTwoByteString, true>(isolate, return QuoteJsonString<uc16, SeqTwoByteString, true>(isolate,
str->ToUC16Vector()); flat.ToUC16Vector());
} else { } else {
return QuoteJsonString<char, SeqAsciiString, true>(isolate, return QuoteJsonString<char, SeqAsciiString, true>(isolate,
str->ToAsciiVector()); flat.ToAsciiVector());
} }
} }
...@@ -5482,14 +5486,16 @@ static MaybeObject* QuoteJsonStringArray(Isolate* isolate, ...@@ -5482,14 +5486,16 @@ static MaybeObject* QuoteJsonStringArray(Isolate* isolate,
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
if (i != 0) *(write_cursor++) = ','; if (i != 0) *(write_cursor++) = ',';
String* str = String::cast(array->get(i)); String* str = String::cast(array->get(i));
if (str->IsTwoByteRepresentation()) { String::FlatContent content = str->GetFlatContent(no_gc);
ASSERT(content.IsFlat());
if (content.IsTwoByte()) {
write_cursor = WriteQuoteJsonString<Char, uc16>(isolate, write_cursor = WriteQuoteJsonString<Char, uc16>(isolate,
write_cursor, write_cursor,
str->ToUC16Vector()); content.ToUC16Vector());
} else { } else {
write_cursor = WriteQuoteJsonString<Char, char>(isolate, write_cursor = WriteQuoteJsonString<Char, char>(isolate,
write_cursor, write_cursor,
str->ToAsciiVector()); content.ToAsciiVector());
} }
} }
*(write_cursor++) = ']'; *(write_cursor++) = ']';
...@@ -5968,11 +5974,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { ...@@ -5968,11 +5974,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
// No allocation block. // No allocation block.
{ {
AssertNoAllocation nogc; AssertNoAllocation no_gc;
if (subject->IsAsciiRepresentation()) { String::FlatContent subject_content = subject->GetFlatContent(no_gc);
Vector<const char> subject_vector = subject->ToAsciiVector(); String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
if (pattern->IsAsciiRepresentation()) { ASSERT(subject_content.IsFlat());
Vector<const char> pattern_vector = pattern->ToAsciiVector(); ASSERT(pattern_content.IsFlat());
if (subject_content.IsAscii()) {
Vector<const char> subject_vector = subject_content.ToAsciiVector();
if (pattern_content.IsAscii()) {
Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
if (pattern_vector.length() == 1) { if (pattern_vector.length() == 1) {
FindAsciiStringIndices(subject_vector, FindAsciiStringIndices(subject_vector,
pattern_vector[0], pattern_vector[0],
...@@ -5988,22 +5998,22 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { ...@@ -5988,22 +5998,22 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
} else { } else {
FindStringIndices(isolate, FindStringIndices(isolate,
subject_vector, subject_vector,
pattern->ToUC16Vector(), pattern_content.ToUC16Vector(),
&indices, &indices,
limit); limit);
} }
} else { } else {
Vector<const uc16> subject_vector = subject->ToUC16Vector(); Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern->IsAsciiRepresentation()) { if (pattern->IsAsciiRepresentation()) {
FindStringIndices(isolate, FindStringIndices(isolate,
subject_vector, subject_vector,
pattern->ToAsciiVector(), pattern_content.ToAsciiVector(),
&indices, &indices,
limit); limit);
} else { } else {
FindStringIndices(isolate, FindStringIndices(isolate,
subject_vector, subject_vector,
pattern->ToUC16Vector(), pattern_content.ToUC16Vector(),
&indices, &indices,
limit); limit);
} }
...@@ -6085,36 +6095,40 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToArray) { ...@@ -6085,36 +6095,40 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToArray) {
CONVERT_ARG_CHECKED(String, s, 0); CONVERT_ARG_CHECKED(String, s, 0);
CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[1]); CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[1]);
s->TryFlatten(); s = FlattenGetString(s);
const int length = static_cast<int>(Min<uint32_t>(s->length(), limit)); const int length = static_cast<int>(Min<uint32_t>(s->length(), limit));
Handle<FixedArray> elements; Handle<FixedArray> elements;
int position = 0;
if (s->IsFlat() && s->IsAsciiRepresentation()) { if (s->IsFlat() && s->IsAsciiRepresentation()) {
// Try using cached chars where possible.
Object* obj; Object* obj;
{ MaybeObject* maybe_obj = { MaybeObject* maybe_obj =
isolate->heap()->AllocateUninitializedFixedArray(length); isolate->heap()->AllocateUninitializedFixedArray(length);
if (!maybe_obj->ToObject(&obj)) return maybe_obj; if (!maybe_obj->ToObject(&obj)) return maybe_obj;
} }
AssertNoAllocation no_alloc;
elements = Handle<FixedArray>(FixedArray::cast(obj), isolate); elements = Handle<FixedArray>(FixedArray::cast(obj), isolate);
String::FlatContent content = s->GetFlatContent(no_alloc);
Vector<const char> chars = s->ToAsciiVector(); if (content.IsAscii()) {
// Note, this will initialize all elements (not only the prefix) Vector<const char> chars = content.ToAsciiVector();
// to prevent GC from seeing partially initialized array. // Note, this will initialize all elements (not only the prefix)
int num_copied_from_cache = CopyCachedAsciiCharsToArray(isolate->heap(), // to prevent GC from seeing partially initialized array.
chars.start(), position = CopyCachedAsciiCharsToArray(isolate->heap(),
*elements, chars.start(),
length); *elements,
length);
for (int i = num_copied_from_cache; i < length; ++i) { } else {
Handle<Object> str = LookupSingleCharacterStringFromCode(chars[i]); MemsetPointer(elements->data_start(),
elements->set(i, *str); isolate->heap()->undefined_value(),
length);
} }
} else { } else {
elements = isolate->factory()->NewFixedArray(length); elements = isolate->factory()->NewFixedArray(length);
for (int i = 0; i < length; ++i) { }
Handle<Object> str = LookupSingleCharacterStringFromCode(s->Get(i)); for (int i = position; i < length; ++i) {
elements->set(i, *str); Handle<Object> str = LookupSingleCharacterStringFromCode(s->Get(i));
} elements->set(i, *str);
} }
#ifdef DEBUG #ifdef DEBUG
...@@ -6916,6 +6930,7 @@ static Object* StringInputBufferCompare(RuntimeState* state, ...@@ -6916,6 +6930,7 @@ static Object* StringInputBufferCompare(RuntimeState* state,
static Object* FlatStringCompare(String* x, String* y) { static Object* FlatStringCompare(String* x, String* y) {
ASSERT(x->IsFlat()); ASSERT(x->IsFlat());
ASSERT(y->IsFlat()); ASSERT(y->IsFlat());
AssertNoAllocation no_alloc;
Object* equal_prefix_result = Smi::FromInt(EQUAL); Object* equal_prefix_result = Smi::FromInt(EQUAL);
int prefix_length = x->length(); int prefix_length = x->length();
if (y->length() < prefix_length) { if (y->length() < prefix_length) {
...@@ -6925,22 +6940,24 @@ static Object* FlatStringCompare(String* x, String* y) { ...@@ -6925,22 +6940,24 @@ static Object* FlatStringCompare(String* x, String* y) {
equal_prefix_result = Smi::FromInt(LESS); equal_prefix_result = Smi::FromInt(LESS);
} }
int r; int r;
if (x->IsAsciiRepresentation()) { String::FlatContent x_content = x->GetFlatContent(no_alloc);
Vector<const char> x_chars = x->ToAsciiVector(); String::FlatContent y_content = y->GetFlatContent(no_alloc);
if (y->IsAsciiRepresentation()) { if (x_content.IsAscii()) {
Vector<const char> y_chars = y->ToAsciiVector(); Vector<const char> x_chars = x_content.ToAsciiVector();
if (y_content.IsAscii()) {
Vector<const char> y_chars = y_content.ToAsciiVector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} else { } else {
Vector<const uc16> y_chars = y->ToUC16Vector(); Vector<const uc16> y_chars = y_content.ToUC16Vector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} }
} else { } else {
Vector<const uc16> x_chars = x->ToUC16Vector(); Vector<const uc16> x_chars = x_content.ToUC16Vector();
if (y->IsAsciiRepresentation()) { if (y_content.IsAscii()) {
Vector<const char> y_chars = y->ToAsciiVector(); Vector<const char> y_chars = y_content.ToAsciiVector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} else { } else {
Vector<const uc16> y_chars = y->ToUC16Vector(); Vector<const uc16> y_chars = y_content.ToUC16Vector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} }
} }
...@@ -8821,13 +8838,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_DateParseString) { ...@@ -8821,13 +8838,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_DateParseString) {
FixedArray* output_array = FixedArray::cast(output->elements()); FixedArray* output_array = FixedArray::cast(output->elements());
RUNTIME_ASSERT(output_array->length() >= DateParser::OUTPUT_SIZE); RUNTIME_ASSERT(output_array->length() >= DateParser::OUTPUT_SIZE);
bool result; bool result;
if (str->IsAsciiRepresentation()) { String::FlatContent str_content = str->GetFlatContent(no_allocation);
result = DateParser::Parse(str->ToAsciiVector(), if (str_content.IsAscii()) {
result = DateParser::Parse(str_content.ToAsciiVector(),
output_array, output_array,
isolate->unicode_cache()); isolate->unicode_cache());
} else { } else {
ASSERT(str->IsTwoByteRepresentation()); ASSERT(str_content.IsTwoByte());
result = DateParser::Parse(str->ToUC16Vector(), result = DateParser::Parse(str_content.ToUC16Vector(),
output_array, output_array,
isolate->unicode_cache()); isolate->unicode_cache());
} }
...@@ -12805,9 +12823,12 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_ListNatives) { ...@@ -12805,9 +12823,12 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_ListNatives) {
RUNTIME_FUNCTION(MaybeObject*, Runtime_Log) { RUNTIME_FUNCTION(MaybeObject*, Runtime_Log) {
ASSERT(args.length() == 2); ASSERT(args.length() == 2);
AssertNoAllocation no_alloc;
CONVERT_CHECKED(String, format, args[0]); CONVERT_CHECKED(String, format, args[0]);
CONVERT_CHECKED(JSArray, elms, args[1]); CONVERT_CHECKED(JSArray, elms, args[1]);
Vector<const char> chars = format->ToAsciiVector(); String::FlatContent format_content = format->GetFlatContent(no_alloc);
RUNTIME_ASSERT(format_content.IsAscii());
Vector<const char> chars = format_content.ToAsciiVector();
LOGGER->LogRuntime(chars, elms); LOGGER->LogRuntime(chars, elms);
return isolate->heap()->undefined_value(); return isolate->heap()->undefined_value();
} }
......
...@@ -116,3 +116,14 @@ assertEquals(["a", "b", "c"], "abc".split("", 3)); ...@@ -116,3 +116,14 @@ assertEquals(["a", "b", "c"], "abc".split("", 3));
assertEquals(["a", "b", "c"], "abc".split("", numberObj(3))); assertEquals(["a", "b", "c"], "abc".split("", numberObj(3)));
assertEquals(["a", "b", "c"], "abc".split("", 4)); assertEquals(["a", "b", "c"], "abc".split("", 4));
assertEquals(["a", "b", "c"], "abc".split("", numberObj(4))); assertEquals(["a", "b", "c"], "abc".split("", numberObj(4)));
var all_ascii_chars = [];
for (var i = 0; i < 128; i++) all_ascii_chars[i] = String.fromCharCode(i);
var all_ascii_string = all_ascii_chars.join("");
var split_chars = all_ascii_string.split("");
assertEquals(128, split_chars.length);
for (var i = 0; i < 128; i++) {
assertEquals(1, split_chars[i].length);
assertEquals(i, split_chars[i].charCodeAt(0));
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment