string-builder-inl.h 9.14 KB
Newer Older
1 2 3 4
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

5 6
#ifndef V8_STRING_BUILDER_INL_H_
#define V8_STRING_BUILDER_INL_H_
7

8
#include "src/assert-scope.h"
9
#include "src/handles-inl.h"
10
#include "src/heap/factory.h"
11 12
#include "src/isolate.h"
#include "src/objects.h"
13
#include "src/objects/fixed-array.h"
14
#include "src/objects/string-inl.h"
15
#include "src/utils.h"
16 17 18 19 20 21 22 23 24 25 26 27 28 29

namespace v8 {
namespace internal {

const int kStringBuilderConcatHelperLengthBits = 11;
const int kStringBuilderConcatHelperPositionBits = 19;

typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
    StringBuilderSubstringLength;
typedef BitField<int, kStringBuilderConcatHelperLengthBits,
                 kStringBuilderConcatHelperPositionBits>
    StringBuilderSubstringPosition;

template <typename sinkchar>
30
void StringBuilderConcatHelper(String special, sinkchar* sink,
31
                               FixedArray fixed_array, int array_length);
32 33 34

// Returns the result length of the concatenation.
// On illegal argument, -1 is returned.
35
int StringBuilderConcatLength(int special_length, FixedArray fixed_array,
36
                              int array_length, bool* one_byte);
37 38 39

class FixedArrayBuilder {
 public:
40 41
  explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity);
  explicit FixedArrayBuilder(Handle<FixedArray> backing_store);
42

43 44
  bool HasCapacity(int elements);
  void EnsureCapacity(Isolate* isolate, int elements);
45

46
  void Add(Object value);
47
  void Add(Smi value);
48 49 50 51 52

  Handle<FixedArray> array() { return array_; }

  int length() { return length_; }

53
  int capacity();
54

55
  Handle<JSArray> ToJSArray(Handle<JSArray> target_array);
56 57 58 59 60 61 62 63 64 65

 private:
  Handle<FixedArray> array_;
  int length_;
  bool has_non_smi_elements_;
};

class ReplacementStringBuilder {
 public:
  ReplacementStringBuilder(Heap* heap, Handle<String> subject,
66
                           int estimated_part_count);
67 68 69

  static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
                                     int to) {
70
    DCHECK_GE(from, 0);
71
    int length = to - from;
72
    DCHECK_GT(length, 0);
73 74 75 76 77 78 79 80 81 82 83 84
    if (StringBuilderSubstringLength::is_valid(length) &&
        StringBuilderSubstringPosition::is_valid(from)) {
      int encoded_slice = StringBuilderSubstringLength::encode(length) |
                          StringBuilderSubstringPosition::encode(from);
      builder->Add(Smi::FromInt(encoded_slice));
    } else {
      // Otherwise encode as two smis.
      builder->Add(Smi::FromInt(-length));
      builder->Add(Smi::FromInt(from));
    }
  }

85
  void EnsureCapacity(int elements);
86 87 88 89 90 91

  void AddSubjectSlice(int from, int to) {
    AddSubjectSlice(&array_builder_, from, to);
    IncrementCharacterCount(to - from);
  }

92
  void AddString(Handle<String> string);
93

94
  MaybeHandle<String> ToString();
95 96 97 98 99 100 101 102 103 104 105

  void IncrementCharacterCount(int by) {
    if (character_count_ > String::kMaxLength - by) {
      STATIC_ASSERT(String::kMaxLength < kMaxInt);
      character_count_ = kMaxInt;
    } else {
      character_count_ += by;
    }
  }

 private:
106
  void AddElement(Object element);
107 108 109 110 111 112 113

  Heap* heap_;
  FixedArrayBuilder array_builder_;
  Handle<String> subject_;
  int character_count_;
  bool is_one_byte_;
};
114 115 116 117 118

class IncrementalStringBuilder {
 public:
  explicit IncrementalStringBuilder(Isolate* isolate);

119
  V8_INLINE String::Encoding CurrentEncoding() { return encoding_; }
120 121

  template <typename SrcChar, typename DestChar>
122
  V8_INLINE void Append(SrcChar c);
123

124
  V8_INLINE void AppendCharacter(uint8_t c) {
125 126 127 128 129 130 131
    if (encoding_ == String::ONE_BYTE_ENCODING) {
      Append<uint8_t, uint8_t>(c);
    } else {
      Append<uint8_t, uc16>(c);
    }
  }

132
  V8_INLINE void AppendCString(const char* s) {
133 134 135 136 137 138 139 140
    const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
    if (encoding_ == String::ONE_BYTE_ENCODING) {
      while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
    } else {
      while (*u != '\0') Append<uint8_t, uc16>(*(u++));
    }
  }

141
  V8_INLINE void AppendCString(const uc16* s) {
142 143 144 145 146 147 148
    if (encoding_ == String::ONE_BYTE_ENCODING) {
      while (*s != '\0') Append<uc16, uint8_t>(*(s++));
    } else {
      while (*s != '\0') Append<uc16, uc16>(*(s++));
    }
  }

149
  V8_INLINE bool CurrentPartCanFit(int length) {
150 151 152
    return part_length_ - current_index_ > length;
  }

153 154 155 156
  // We make a rough estimate to find out if the current string can be
  // serialized without allocating a new string part. The worst case length of
  // an escaped character is 6. Shifting the remaining string length right by 3
  // is a more pessimistic estimate, but faster to calculate.
157
  V8_INLINE int EscapedLengthIfCurrentPartFits(int length) {
158 159 160 161 162 163 164 165
    if (length > kMaxPartLength) return 0;
    STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
    // This shift will not overflow because length is already less than the
    // maximum part length.
    int worst_case_length = length << 3;
    return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
  }

166 167 168 169
  void AppendString(Handle<String> string);

  MaybeHandle<String> Finish();

170
  V8_INLINE bool HasOverflowed() const { return overflowed_; }
171

172
  int Length() const;
173

174 175 176 177 178 179 180 181 182 183 184
  // Change encoding to two-byte.
  void ChangeEncoding() {
    DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
    ShrinkCurrentPart();
    encoding_ = String::TWO_BYTE_ENCODING;
    Extend();
  }

  template <typename DestChar>
  class NoExtend {
   public:
185 186
    NoExtend(Handle<String> string, int offset,
             const DisallowHeapAllocation& no_gc) {
187 188 189
      DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
      if (sizeof(DestChar) == 1) {
        start_ = reinterpret_cast<DestChar*>(
190
            Handle<SeqOneByteString>::cast(string)->GetChars(no_gc) + offset);
191 192
      } else {
        start_ = reinterpret_cast<DestChar*>(
193
            Handle<SeqTwoByteString>::cast(string)->GetChars(no_gc) + offset);
194 195 196 197
      }
      cursor_ = start_;
    }

198 199
    V8_INLINE void Append(DestChar c) { *(cursor_++) = c; }
    V8_INLINE void AppendCString(const char* s) {
200 201 202 203
      const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
      while (*u != '\0') Append(*(u++));
    }

204
    int written() { return static_cast<int>(cursor_ - start_); }
205 206 207 208

   private:
    DestChar* start_;
    DestChar* cursor_;
209
    DISALLOW_HEAP_ALLOCATION(no_gc_);
210 211 212 213 214 215 216 217 218 219
  };

  template <typename DestChar>
  class NoExtendString : public NoExtend<DestChar> {
   public:
    NoExtendString(Handle<String> string, int required_length)
        : NoExtend<DestChar>(string, 0), string_(string) {
      DCHECK(string->length() >= required_length);
    }

220
    Handle<String> Finalize() {
221 222
      Handle<SeqString> string = Handle<SeqString>::cast(string_);
      int length = NoExtend<DestChar>::written();
223 224 225
      Handle<String> result = SeqString::Truncate(string, length);
      string_ = Handle<String>();
      return result;
226 227 228 229 230 231 232 233 234
    }

   private:
    Handle<String> string_;
  };

  template <typename DestChar>
  class NoExtendBuilder : public NoExtend<DestChar> {
   public:
235 236 237 238
    NoExtendBuilder(IncrementalStringBuilder* builder, int required_length,
                    const DisallowHeapAllocation& no_gc)
        : NoExtend<DestChar>(builder->current_part(), builder->current_index_,
                             no_gc),
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
          builder_(builder) {
      DCHECK(builder->CurrentPartCanFit(required_length));
    }

    ~NoExtendBuilder() {
      builder_->current_index_ += NoExtend<DestChar>::written();
    }

   private:
    IncrementalStringBuilder* builder_;
  };

 private:
  Factory* factory() { return isolate_->factory(); }

254
  V8_INLINE Handle<String> accumulator() { return accumulator_; }
255

256
  V8_INLINE void set_accumulator(Handle<String> string) {
257
    *accumulator_.location() = string->ptr();
258 259
  }

260
  V8_INLINE Handle<String> current_part() { return current_part_; }
261

262
  V8_INLINE void set_current_part(Handle<String> string) {
263
    *current_part_.location() = string->ptr();
264 265 266
  }

  // Add the current part to the accumulator.
267
  void Accumulate(Handle<String> new_part);
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305

  // Finish the current part and allocate a new part.
  void Extend();

  // Shrink current part to the right size.
  void ShrinkCurrentPart() {
    DCHECK(current_index_ < part_length_);
    set_current_part(SeqString::Truncate(
        Handle<SeqString>::cast(current_part()), current_index_));
  }

  static const int kInitialPartLength = 32;
  static const int kMaxPartLength = 16 * 1024;
  static const int kPartLengthGrowthFactor = 2;

  Isolate* isolate_;
  String::Encoding encoding_;
  bool overflowed_;
  int part_length_;
  int current_index_;
  Handle<String> accumulator_;
  Handle<String> current_part_;
};

template <typename SrcChar, typename DestChar>
void IncrementalStringBuilder::Append(SrcChar c) {
  DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
  if (sizeof(DestChar) == 1) {
    DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
    SeqOneByteString::cast(*current_part_)
        ->SeqOneByteStringSet(current_index_++, c);
  } else {
    DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
    SeqTwoByteString::cast(*current_part_)
        ->SeqTwoByteStringSet(current_index_++, c);
  }
  if (current_index_ == part_length_) Extend();
}
306 307
}  // namespace internal
}  // namespace v8
308

309
#endif  // V8_STRING_BUILDER_INL_H_