Commit 9eb7d4a5 authored by lrn@chromium.org's avatar lrn@chromium.org

Replace ToAsciiVector and ToUC16Vector with single function that returns a tagged value.

The tag tells whether the content is ASCII or UC16, or even if the string wasn't flat.

BUG: v8:1633

Review URL: http://codereview.chromium.org/7709024

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8999 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent bf4222fb
......@@ -617,15 +617,17 @@ Handle<FixedArray> CalculateLineEnds(Handle<String> src,
{
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid.
// Dispatch on type of strings.
if (src->IsAsciiRepresentation()) {
String::FlatContent content = src->GetFlatContent(no_heap_allocation);
ASSERT(content.IsFlat());
if (content.IsAscii()) {
CalculateLineEnds(isolate,
&line_ends,
src->ToAsciiVector(),
content.ToAsciiVector(),
with_last_line);
} else {
CalculateLineEnds(isolate,
&line_ends,
src->ToUC16Vector(),
content.ToUC16Vector(),
with_last_line);
}
}
......
......@@ -778,7 +778,8 @@ void HHasInstanceTypeAndBranch::PrintDataTo(StringStream* stream) {
void HTypeofIsAndBranch::PrintDataTo(StringStream* stream) {
value()->PrintNameTo(stream);
stream->Add(" == ");
stream->Add(type_literal_->ToAsciiVector());
AssertNoAllocation no_alloc;
stream->Add(type_literal_->GetFlatContent(no_alloc).ToAsciiVector());
}
......
// Copyright 2008 the V8 project authors. All rights reserved.
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
......@@ -635,8 +635,9 @@ bool IrregexpInterpreter::Match(Isolate* isolate,
AssertNoAllocation a;
const byte* code_base = code_array->GetDataStartAddress();
uc16 previous_char = '\n';
if (subject->IsAsciiRepresentation()) {
Vector<const char> subject_vector = subject->ToAsciiVector();
String::FlatContent subject_content = subject->GetFlatContent(a);
if (subject_content.IsAscii()) {
Vector<const char> subject_vector = subject_content.ToAsciiVector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate,
code_base,
......@@ -645,7 +646,8 @@ bool IrregexpInterpreter::Match(Isolate* isolate,
start_position,
previous_char);
} else {
Vector<const uc16> subject_vector = subject->ToUC16Vector();
ASSERT(subject_content.IsTwoByte());
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate,
code_base,
......
......@@ -212,19 +212,7 @@ static void SetAtomLastCapture(FixedArray* array,
RegExpImpl::SetCapture(array, 1, to);
}
/* template <typename SubjectChar>, typename PatternChar>
static int ReStringMatch(Vector<const SubjectChar> sub_vector,
Vector<const PatternChar> pat_vector,
int start_index) {
int pattern_length = pat_vector.length();
if (pattern_length == 0) return start_index;
int subject_length = sub_vector.length();
if (start_index + pattern_length > subject_length) return -1;
return SearchString(sub_vector, pat_vector, start_index);
}
*/
Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
Handle<String> subject,
int index,
......@@ -237,35 +225,41 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
if (!subject->IsFlat()) FlattenString(subject);
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *subject;
if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
int needle_len = needle->length();
ASSERT(needle->IsFlat());
if (needle_len != 0) {
if (index + needle_len > subject->length())
return isolate->factory()->null_value();
if (index + needle_len > subject->length()) {
return isolate->factory()->null_value();
}
String::FlatContent needle_content =
needle->GetFlatContent(no_heap_allocation);
String::FlatContent subject_content =
subject->GetFlatContent(no_heap_allocation);
ASSERT(needle_content.IsFlat());
ASSERT(subject_content.IsFlat());
// dispatch on type of strings
index = (needle->IsAsciiRepresentation()
? (seq_sub->IsAsciiRepresentation()
index = (needle_content.IsAscii()
? (subject_content.IsAscii()
? SearchString(isolate,
seq_sub->ToAsciiVector(),
needle->ToAsciiVector(),
subject_content.ToAsciiVector(),
needle_content.ToAsciiVector(),
index)
: SearchString(isolate,
seq_sub->ToUC16Vector(),
needle->ToAsciiVector(),
subject_content.ToUC16Vector(),
needle_content.ToAsciiVector(),
index))
: (seq_sub->IsAsciiRepresentation()
: (subject_content.IsAscii()
? SearchString(isolate,
seq_sub->ToAsciiVector(),
needle->ToUC16Vector(),
subject_content.ToAsciiVector(),
needle_content.ToUC16Vector(),
index)
: SearchString(isolate,
seq_sub->ToUC16Vector(),
needle->ToUC16Vector(),
subject_content.ToUC16Vector(),
needle_content.ToUC16Vector(),
index)));
if (index == -1) return isolate->factory()->null_value();
}
......
......@@ -297,6 +297,11 @@ StringRepresentationTag StringShape::representation_tag() {
}
uint32_t StringShape::encoding_tag() {
return type_ & kStringEncodingMask;
}
uint32_t StringShape::full_representation_tag() {
return (type_ & (kStringRepresentationMask | kStringEncodingMask));
}
......
......@@ -5038,55 +5038,38 @@ int String::Utf8Length() {
}
Vector<const char> String::ToAsciiVector() {
ASSERT(IsAsciiRepresentation());
ASSERT(IsFlat());
int offset = 0;
int length = this->length();
StringRepresentationTag string_tag = StringShape(this).representation_tag();
String* string = this;
if (string_tag == kConsStringTag) {
ConsString* cons = ConsString::cast(string);
ASSERT(cons->second()->length() == 0);
string = cons->first();
string_tag = StringShape(string).representation_tag();
}
if (string_tag == kSeqStringTag) {
SeqAsciiString* seq = SeqAsciiString::cast(string);
char* start = seq->GetChars();
return Vector<const char>(start + offset, length);
}
ASSERT(string_tag == kExternalStringTag);
ExternalAsciiString* ext = ExternalAsciiString::cast(string);
const char* start = ext->resource()->data();
return Vector<const char>(start + offset, length);
}
Vector<const uc16> String::ToUC16Vector() {
ASSERT(IsTwoByteRepresentation());
ASSERT(IsFlat());
int offset = 0;
String::FlatContent String::GetFlatContent(const AssertNoAllocation& promise) {
// Argument isn't used, it's only there to ensure that the user is
// aware that the extracted vectors may not survive a GC.
int length = this->length();
StringRepresentationTag string_tag = StringShape(this).representation_tag();
StringShape shape(this);
String* string = this;
if (string_tag == kConsStringTag) {
if (shape.representation_tag() == kConsStringTag) {
ConsString* cons = ConsString::cast(string);
ASSERT(cons->second()->length() == 0);
if (cons->second()->length() != 0) {
return FlatContent();
}
string = cons->first();
string_tag = StringShape(string).representation_tag();
shape = StringShape(string);
}
if (string_tag == kSeqStringTag) {
SeqTwoByteString* seq = SeqTwoByteString::cast(string);
return Vector<const uc16>(seq->GetChars() + offset, length);
if (shape.encoding_tag() == kAsciiStringTag) {
const char* start;
if (shape.representation_tag() == kSeqStringTag) {
start = SeqAsciiString::cast(string)->GetChars();
} else {
start = ExternalAsciiString::cast(string)->resource()->data();
}
return FlatContent(Vector<const char>(start, length));
} else {
ASSERT(shape.encoding_tag() == kTwoByteStringTag);
const uc16* start;
if (shape.representation_tag() == kSeqStringTag) {
start = SeqTwoByteString::cast(string)->GetChars();
} else {
start = ExternalTwoByteString::cast(string)->resource()->data();
}
return FlatContent(Vector<const uc16>(start, length));
}
ASSERT(string_tag == kExternalStringTag);
ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
const uc16* start =
reinterpret_cast<const uc16*>(ext->resource()->data());
return Vector<const uc16>(start + offset, length);
}
......@@ -5536,11 +5519,14 @@ void FlatStringReader::PostGarbageCollection() {
if (str_ == NULL) return;
Handle<String> str(str_);
ASSERT(str->IsFlat());
is_ascii_ = str->IsAsciiRepresentation();
AssertNoAllocation no_alloc;
String::FlatContent content = str->GetFlatContent(no_alloc);
ASSERT(content.is_flat());
is_ascii_ = content.IsAscii();
if (is_ascii_) {
start_ = str->ToAsciiVector().start();
start_ = content.ToAsciiVector().start();
} else {
start_ = str->ToUC16Vector().start();
start_ = content.ToUC16Vector().start();
}
}
......@@ -5860,12 +5846,14 @@ template <typename IteratorA>
static inline bool CompareStringContentsPartial(Isolate* isolate,
IteratorA* ia,
String* b) {
if (b->IsFlat()) {
if (b->IsAsciiRepresentation()) {
VectorIterator<char> ib(b->ToAsciiVector());
AssertNoAllocation no_alloc;
String::FlatContent content = b->GetFlatContent(no_alloc);
if (content.IsFlat()) {
if (content.IsAscii()) {
VectorIterator<char> ib(content.ToAsciiVector());
return CompareStringContents(ia, &ib);
} else {
VectorIterator<uc16> ib(b->ToUC16Vector());
VectorIterator<uc16> ib(content.ToUC16Vector());
return CompareStringContents(ia, &ib);
}
} else {
......@@ -5895,6 +5883,8 @@ bool String::SlowEquals(String* other) {
String* lhs = this->TryFlattenGetString();
String* rhs = other->TryFlattenGetString();
AssertNoAllocation no_alloc;
if (StringShape(lhs).IsSequentialAscii() &&
StringShape(rhs).IsSequentialAscii()) {
const char* str1 = SeqAsciiString::cast(lhs)->GetChars();
......@@ -5904,16 +5894,18 @@ bool String::SlowEquals(String* other) {
}
Isolate* isolate = GetIsolate();
if (lhs->IsFlat()) {
if (lhs->IsAsciiRepresentation()) {
Vector<const char> vec1 = lhs->ToAsciiVector();
if (rhs->IsFlat()) {
if (rhs->IsAsciiRepresentation()) {
Vector<const char> vec2 = rhs->ToAsciiVector();
String::FlatContent lhs_content = lhs->GetFlatContent(no_alloc);
String::FlatContent rhs_content = rhs->GetFlatContent(no_alloc);
if (lhs_content.IsFlat()) {
if (lhs_content.IsAscii()) {
Vector<const char> vec1 = lhs_content.ToAsciiVector();
if (rhs_content.IsFlat()) {
if (rhs_content.IsAscii()) {
Vector<const char> vec2 = rhs_content.ToAsciiVector();
return CompareRawStringContents(vec1, vec2);
} else {
VectorIterator<char> buf1(vec1);
VectorIterator<uc16> ib(rhs->ToUC16Vector());
VectorIterator<uc16> ib(rhs_content.ToUC16Vector());
return CompareStringContents(&buf1, &ib);
}
} else {
......@@ -5923,14 +5915,14 @@ bool String::SlowEquals(String* other) {
isolate->objects_string_compare_buffer_b());
}
} else {
Vector<const uc16> vec1 = lhs->ToUC16Vector();
if (rhs->IsFlat()) {
if (rhs->IsAsciiRepresentation()) {
Vector<const uc16> vec1 = lhs_content.ToUC16Vector();
if (rhs_content.IsFlat()) {
if (rhs_content.IsAscii()) {
VectorIterator<uc16> buf1(vec1);
VectorIterator<char> ib(rhs->ToAsciiVector());
VectorIterator<char> ib(rhs_content.ToAsciiVector());
return CompareStringContents(&buf1, &ib);
} else {
Vector<const uc16> vec2(rhs->ToUC16Vector());
Vector<const uc16> vec2(rhs_content.ToUC16Vector());
return CompareRawStringContents(vec1, vec2);
}
} else {
......@@ -5981,10 +5973,13 @@ bool String::IsEqualTo(Vector<const char> str) {
bool String::IsAsciiEqualTo(Vector<const char> str) {
AssertNoAllocation no_alloc;
int slen = length();
if (str.length() != slen) return false;
if (IsFlat() && IsAsciiRepresentation()) {
return CompareChars(ToAsciiVector().start(), str.start(), slen) == 0;
FlatContent content = GetFlatContent(no_alloc);
if (content.IsAscii()) {
return CompareChars(content.ToAsciiVector().start(),
str.start(), slen) == 0;
}
for (int i = 0; i < slen; i++) {
if (Get(i) != static_cast<uint16_t>(str[i])) return false;
......@@ -5994,10 +5989,12 @@ bool String::IsAsciiEqualTo(Vector<const char> str) {
bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
AssertNoAllocation no_alloc;
int slen = length();
if (str.length() != slen) return false;
if (IsFlat() && IsTwoByteRepresentation()) {
return CompareChars(ToUC16Vector().start(), str.start(), slen) == 0;
FlatContent content = GetFlatContent(no_alloc);
if (content.IsTwoByte()) {
return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0;
}
for (int i = 0; i < slen; i++) {
if (Get(i) != str[i]) return false;
......
......@@ -5789,6 +5789,7 @@ class StringShape BASE_EMBEDDED {
inline bool IsSequentialTwoByte();
inline bool IsSymbol();
inline StringRepresentationTag representation_tag();
inline uint32_t encoding_tag();
inline uint32_t full_representation_tag();
inline uint32_t size_tag();
#ifdef DEBUG
......@@ -5820,6 +5821,51 @@ class StringShape BASE_EMBEDDED {
// All string values have a length field.
class String: public HeapObject {
public:
// Representation of the flat content of a String.
// A non-flat string doesn't have flat content.
// A flat string has content that's encoded as a sequence of either
// ASCII chars or two-byte UC16.
// Returned by String::GetFlatContent().
class FlatContent {
public:
// Returns true if the string is flat and this structure contains content.
bool IsFlat() { return state_ != NON_FLAT; }
// Returns true if the structure contains ASCII content.
bool IsAscii() { return state_ == ASCII; }
// Returns true if the structure contains two-byte content.
bool IsTwoByte() { return state_ == TWO_BYTE; }
// Return the ASCII content of the string. Only use if IsAscii() returns
// true.
Vector<const char> ToAsciiVector() {
ASSERT_EQ(ASCII, state_);
return Vector<const char>::cast(buffer_);
}
// Return the two-byte content of the string. Only use if IsTwoByte()
// returns true.
Vector<const uc16> ToUC16Vector() {
ASSERT_EQ(TWO_BYTE, state_);
return Vector<const uc16>::cast(buffer_);
}
private:
enum State { NON_FLAT, ASCII, TWO_BYTE };
// Constructors only used by String::GetFlatContent().
explicit FlatContent(Vector<const char> chars)
: buffer_(Vector<const byte>::cast(chars)),
state_(ASCII) { }
explicit FlatContent(Vector<const uc16> chars)
: buffer_(Vector<const byte>::cast(chars)),
state_(TWO_BYTE) { }
FlatContent() : buffer_(), state_(NON_FLAT) { }
Vector<const byte> buffer_;
State state_;
friend class String;
};
// Get and set the length of the string.
inline int length();
inline void set_length(int value);
......@@ -5831,10 +5877,10 @@ class String: public HeapObject {
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
// Returns whether this string has ascii chars, i.e. all of them can
// be ascii encoded. This might be the case even if the string is
// Returns whether this string has only ASCII chars, i.e. all of them can
// be ASCII encoded. This might be the case even if the string is
// two-byte. Such strings may appear when the embedder prefers
// two-byte external representations even for ascii data.
// two-byte external representations even for ASCII data.
//
// NOTE: this should be considered only a hint. False negatives are
// possible.
......@@ -5868,8 +5914,12 @@ class String: public HeapObject {
// string.
inline String* TryFlattenGetString(PretenureFlag pretenure = NOT_TENURED);
Vector<const char> ToAsciiVector();
Vector<const uc16> ToUC16Vector();
// Tries to return the content of a flat string as a structure holding either
// a flat vector of char or of uc16.
// If the string isn't flat, and therefore doesn't have flat content, the
// returned structure will report so, and can't provide a vector of either
// kind.
FlatContent GetFlatContent(const AssertNoAllocation& safety_promise);
// Mark the string as an undetectable object. It only applies to
// ascii and two byte string types.
......
This diff is collapsed.
......@@ -116,3 +116,14 @@ assertEquals(["a", "b", "c"], "abc".split("", 3));
assertEquals(["a", "b", "c"], "abc".split("", numberObj(3)));
assertEquals(["a", "b", "c"], "abc".split("", 4));
assertEquals(["a", "b", "c"], "abc".split("", numberObj(4)));
var all_ascii_chars = [];
for (var i = 0; i < 128; i++) all_ascii_chars[i] = String.fromCharCode(i);
var all_ascii_string = all_ascii_chars.join("");
var split_chars = all_ascii_string.split("");
assertEquals(128, split_chars.length);
for (var i = 0; i < 128; i++) {
assertEquals(1, split_chars[i].length);
assertEquals(i, split_chars[i].charCodeAt(0));
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment