// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_OBJECTS_STRING_H_
#define V8_OBJECTS_STRING_H_

#include <memory>

#include "src/base/bits.h"
#include "src/base/export-template.h"
#include "src/base/strings.h"
#include "src/common/globals.h"
#include "src/objects/instance-type.h"
#include "src/objects/name.h"
#include "src/objects/smi.h"
#include "src/strings/unicode-decoder.h"
#include "torque-generated/field-offsets.h"

// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"

namespace v8 {
namespace internal {

class SharedStringAccessGuardIfNeeded;

enum InstanceType : uint16_t;

enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };

// The characteristics of a string are stored in its map.  Retrieving these
// few bits of information is moderately expensive, involving two memory
// loads where the second is dependent on the first.  To improve efficiency
// the shape of the string is given its own class so that it can be retrieved
// once and used for several string operations.  A StringShape is small enough
// to be passed by value and is immutable, but be aware that flattening a
// string can potentially alter its shape.  Also be aware that a GC caused by
// something else can alter the shape of a string due to ConsString
// shortcutting.  Keeping these restrictions in mind has proven to be error-
// prone and so we no longer put StringShapes in variables unless there is a
// concrete performance benefit at that particular point in the code.
class StringShape {
 public:
  inline explicit StringShape(const String s);
  inline explicit StringShape(const String s, PtrComprCageBase cage_base);
  inline explicit StringShape(Map s);
  inline explicit StringShape(InstanceType t);
  inline bool IsSequential() const;
  inline bool IsExternal() const;
  inline bool IsCons() const;
  inline bool IsSliced() const;
  inline bool IsThin() const;
  inline bool IsIndirect() const;
  inline bool IsUncachedExternal() const;
  inline bool IsExternalOneByte() const;
  inline bool IsExternalTwoByte() const;
  inline bool IsSequentialOneByte() const;
  inline bool IsSequentialTwoByte() const;
  inline bool IsInternalized() const;
  inline StringRepresentationTag representation_tag() const;
  inline uint32_t encoding_tag() const;
  inline uint32_t full_representation_tag() const;
#ifdef DEBUG
  inline uint32_t type() const { return type_; }
  inline void invalidate() { valid_ = false; }
  inline bool valid() const { return valid_; }
#else
  inline void invalidate() {}
#endif

  // Run different behavior for each concrete string class type, as defined by
  // the dispatcher.
  template <typename TDispatcher, typename TResult, typename... TArgs>
  inline TResult DispatchToSpecificTypeWithoutCast(TArgs&&... args);
  template <typename TDispatcher, typename TResult, typename... TArgs>
  inline TResult DispatchToSpecificType(String str, TArgs&&... args);

 private:
  uint32_t type_;
#ifdef DEBUG
  inline void set_valid() { valid_ = true; }
  bool valid_;
#else
  inline void set_valid() {}
#endif
};

#include "torque-generated/src/objects/string-tq.inc"

// The String abstract class captures JavaScript string values:
//
// Ecma-262:
//  4.3.16 String Value
//    A string value is a member of the type String and is a finite
//    ordered sequence of zero or more 16-bit unsigned integer values.
//
// All string values have a length field.
class String : public TorqueGeneratedString<String, Name> {
 public:
  enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };

  // Representation of the flat content of a String.
  // A non-flat string doesn't have flat content.
  // A flat string has content that's encoded as a sequence of either
  // one-byte chars or two-byte UC16.
  // Returned by String::GetFlatContent().
  // Not safe to use from concurrent background threads.
  // TODO(solanes): Move FlatContent into FlatStringReader, and make it private.
  // This would de-duplicate code, as well as taking advantage of the fact that
  // FlatStringReader is relocatable.
  class FlatContent {
   public:
    // Returns true if the string is flat and this structure contains content.
    bool IsFlat() const { return state_ != NON_FLAT; }
    // Returns true if the structure contains one-byte content.
    bool IsOneByte() const { return state_ == ONE_BYTE; }
    // Returns true if the structure contains two-byte content.
    bool IsTwoByte() const { return state_ == TWO_BYTE; }

    // Return the one byte content of the string. Only use if IsOneByte()
    // returns true.
    base::Vector<const uint8_t> ToOneByteVector() const {
      DCHECK_EQ(ONE_BYTE, state_);
      return base::Vector<const uint8_t>(onebyte_start, length_);
    }
    // Return the two-byte content of the string. Only use if IsTwoByte()
    // returns true.
    base::Vector<const base::uc16> ToUC16Vector() const {
      DCHECK_EQ(TWO_BYTE, state_);
      return base::Vector<const base::uc16>(twobyte_start, length_);
    }

    base::uc16 Get(int i) const {
      DCHECK(i < length_);
      DCHECK(state_ != NON_FLAT);
      if (state_ == ONE_BYTE) return onebyte_start[i];
      return twobyte_start[i];
    }

    bool UsesSameString(const FlatContent& other) const {
      return onebyte_start == other.onebyte_start;
    }

   private:
    enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };

    // Constructors only used by String::GetFlatContent().
    FlatContent(const uint8_t* start, int length,
                const DisallowGarbageCollection& no_gc)
        : onebyte_start(start),
          length_(length),
          state_(ONE_BYTE),
          no_gc_(no_gc) {}
    FlatContent(const base::uc16* start, int length,
                const DisallowGarbageCollection& no_gc)
        : twobyte_start(start),
          length_(length),
          state_(TWO_BYTE),
          no_gc_(no_gc) {}
    explicit FlatContent(const DisallowGarbageCollection& no_gc)
        : onebyte_start(nullptr), length_(0), state_(NON_FLAT), no_gc_(no_gc) {}

    union {
      const uint8_t* onebyte_start;
      const base::uc16* twobyte_start;
    };
    int length_;
    State state_;
    const DisallowGarbageCollection& no_gc_;

    friend class String;
    friend class IterableSubString;
  };

  template <typename IsolateT>
  void MakeThin(IsolateT* isolate, String canonical);

  template <typename Char>
  V8_INLINE base::Vector<const Char> GetCharVector(
      const DisallowGarbageCollection& no_gc);

  // Get chars from sequential or external strings. May only be called when a
  // SharedStringAccessGuard is not needed (i.e. on the main thread or on
  // read-only strings).
  template <typename Char>
  inline const Char* GetChars(const DisallowGarbageCollection& no_gc) const;

  // Get chars from sequential or external strings.
  template <typename Char>
  inline const Char* GetChars(
      const DisallowGarbageCollection& no_gc,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // Returns the address of the character at an offset into this string.
  // Requires: this->IsFlat()
  const byte* AddressOfCharacterAt(int start_index,
                                   const DisallowGarbageCollection& no_gc);

  // Forward declare the non-atomic (set_)length defined in torque.
  using TorqueGeneratedString::length;
  using TorqueGeneratedString::set_length;
  DECL_RELEASE_ACQUIRE_INT_ACCESSORS(length)

  // Returns whether this string has only one-byte chars, i.e. all of them can
  // be one-byte encoded.  This might be the case even if the string is
  // two-byte.  Such strings may appear when the embedder prefers
  // two-byte external representations even for one-byte data.
  DECL_GETTER(IsOneByteRepresentation, bool)
  DECL_GETTER(IsTwoByteRepresentation, bool)

  // Cons and slices have an encoding flag that may not represent the actual
  // encoding of the underlying string.  This is taken into account here.
  // This function is static because that helps it get inlined.
  // Requires: string.IsFlat()
  static inline bool IsOneByteRepresentationUnderneath(String string);

  // Get and set individual two byte chars in the string.
  inline void Set(int index, uint16_t value);
  // Get individual two byte char in the string.  Repeated calls
  // to this method are not efficient unless the string is flat.
  // If it is called from a background thread, the LocalIsolate version should
  // be used.
  V8_INLINE uint16_t Get(int index) const;
  V8_INLINE uint16_t Get(int index, Isolate* isolate) const;
  V8_INLINE uint16_t Get(int index, LocalIsolate* local_isolate) const;
  // Method to pass down the access_guard. Useful for recursive calls such as
  // ThinStrings where we go String::Get into ThinString::Get into String::Get
  // again for the internalized string.
  V8_INLINE uint16_t
  Get(int index, PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // ES6 section 7.1.3.1 ToNumber Applied to the String Type
  static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);

  // Flattens the string.  Checks first inline to see if it is
  // necessary.  Does nothing if the string is not a cons string.
  // Flattening allocates a sequential string with the same data as
  // the given string and mutates the cons string to a degenerate
  // form, where the first component is the new sequential string and
  // the second component is the empty string.  If allocation fails,
  // this function returns a failure.  If flattening succeeds, this
  // function returns the sequential string that is now the first
  // component of the cons string.
  //
  // Degenerate cons strings are handled specially by the garbage
  // collector (see IsShortcutCandidate).

  static inline Handle<String> Flatten(
      Isolate* isolate, Handle<String> string,
      AllocationType allocation = AllocationType::kYoung);
  static inline Handle<String> Flatten(
      LocalIsolate* isolate, Handle<String> string,
      AllocationType allocation = AllocationType::kYoung);

  // Tries to return the content of a flat string as a structure holding either
  // a flat vector of char or of base::uc16.
  // If the string isn't flat, and therefore doesn't have flat content, the
  // returned structure will report so, and can't provide a vector of either
  // kind.
  V8_EXPORT_PRIVATE FlatContent
  GetFlatContent(const DisallowGarbageCollection& no_gc);

  // Returns the parent of a sliced string or first part of a flat cons string.
  // Requires: StringShape(this).IsIndirect() && this->IsFlat()
  inline String GetUnderlying() const;

  // String relational comparison, implemented according to ES6 section 7.2.11
  // Abstract Relational Comparison (step 5): The comparison of Strings uses a
  // simple lexicographic ordering on sequences of code unit values. There is no
  // attempt to use the more complex, semantically oriented definitions of
  // character or string equality and collating order defined in the Unicode
  // specification. Therefore String values that are canonically equal according
  // to the Unicode standard could test as unequal. In effect this algorithm
  // assumes that both Strings are already in normalized form. Also, note that
  // for strings containing supplementary characters, lexicographic ordering on
  // sequences of UTF-16 code unit values differs from that on sequences of code
  // point values.
  V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
                                                        Handle<String> x,
                                                        Handle<String> y);

  // Perform ES6 21.1.3.8, including checking arguments.
  static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
                        Handle<Object> search, Handle<Object> position);
  // Perform string match of pattern on subject, starting at start index.
  // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
  // check any arguments.
  static int IndexOf(Isolate* isolate, Handle<String> receiver,
                     Handle<String> search, int start_index);

  static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
                            Handle<Object> search, Handle<Object> position);

  // Encapsulates logic related to a match and its capture groups as required
  // by GetSubstitution.
  class Match {
   public:
    virtual Handle<String> GetMatch() = 0;
    virtual Handle<String> GetPrefix() = 0;
    virtual Handle<String> GetSuffix() = 0;

    // A named capture can be unmatched (either not specified in the pattern,
    // or specified but unmatched in the current string), or matched.
    enum CaptureState { UNMATCHED, MATCHED };

    virtual int CaptureCount() = 0;
    virtual bool HasNamedCaptures() = 0;
    virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
    virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
                                                CaptureState* state) = 0;

    virtual ~Match() = default;
  };

  // ES#sec-getsubstitution
  // GetSubstitution(matched, str, position, captures, replacement)
  // Expand the $-expressions in the string and return a new string with
  // the result.
  // A {start_index} can be passed to specify where to start scanning the
  // replacement string.
  V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
      Isolate* isolate, Match* match, Handle<String> replacement,
      int start_index = 0);

  // String equality operations.
  inline bool Equals(String other) const;
  inline static bool Equals(Isolate* isolate, Handle<String> one,
                            Handle<String> two);

  enum class EqualityType { kWholeString, kPrefix, kNoLengthCheck };

  // Check if this string matches the given vector of characters, either as a
  // whole string or just a prefix.
  //
  // The Isolate is passed as "evidence" that this call is on the main thread,
  // and to distiguish from the LocalIsolate overload.
  template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
  inline bool IsEqualTo(base::Vector<const Char> str, Isolate* isolate) const;

  // Check if this string matches the given vector of characters, either as a
  // whole string or just a prefix.
  //
  // This is main-thread only, like the Isolate* overload, but additionally
  // computes the PtrComprCageBase for IsEqualToImpl.
  template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
  inline bool IsEqualTo(base::Vector<const Char> str) const;

  // Check if this string matches the given vector of characters, either as a
  // whole string or just a prefix.
  //
  // The LocalIsolate is passed to provide access to the string access lock,
  // which is taken when reading the string's contents on a background thread.
  template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
  inline bool IsEqualTo(base::Vector<const Char> str,
                        LocalIsolate* isolate) const;

  V8_EXPORT_PRIVATE bool HasOneBytePrefix(base::Vector<const char> str);
  V8_EXPORT_PRIVATE inline bool IsOneByteEqualTo(base::Vector<const char> str);

  // Return a UTF8 representation of the string.  The string is null
  // terminated but may optionally contain nulls.  Length is returned
  // in length_output if length_output is not a null pointer  The string
  // should be nearly flat, otherwise the performance of this method may
  // be very slow (quadratic in the length).  Setting robustness_flag to
  // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
  // handles unexpected data without causing assert failures and it does not
  // do any heap allocations.  This is useful when printing stack traces.
  std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
                                    RobustnessFlag robustness_flag, int offset,
                                    int length, int* length_output = nullptr);
  V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString(
      AllowNullsFlag allow_nulls = DISALLOW_NULLS,
      RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
      int* length_output = nullptr);

  // Externalization.
  V8_EXPORT_PRIVATE bool MakeExternal(
      v8::String::ExternalStringResource* resource);
  V8_EXPORT_PRIVATE bool MakeExternal(
      v8::String::ExternalOneByteStringResource* resource);
  bool SupportsExternalization();

  // Conversion.
  // "array index": an index allowed by the ES spec for JSArrays.
  inline bool AsArrayIndex(uint32_t* index);

  // This is used for calculating array indices but differs from an
  // Array Index in the regard that this does not support the full
  // array index range. This only supports positive numbers less than
  // or equal to INT_MAX.
  //
  // String::AsArrayIndex might be a better fit if you're looking to
  // calculate the array index.
  //
  // if val < 0 or val > INT_MAX, returns -1
  // if 0 <= val <= INT_MAX, returns val
  static int32_t ToArrayIndex(Address addr);

  // "integer index": the string is the decimal representation of an
  // integer in the range of a size_t. Useful for TypedArray accesses.
  inline bool AsIntegerIndex(size_t* index);

  // Trimming.
  enum TrimMode { kTrim, kTrimStart, kTrimEnd };

  V8_EXPORT_PRIVATE void PrintOn(FILE* out);
  V8_EXPORT_PRIVATE void PrintOn(std::ostream& out);

  // For use during stack traces.  Performs rudimentary sanity check.
  bool LooksValid();

  // Printing utility functions.
  // - PrintUC16 prints the raw string contents to the given stream.
  //   Non-printable characters are formatted as hex, but otherwise the string
  //   is printed as-is.
  // - StringShortPrint and StringPrint have extra formatting: they add a
  //   prefix and suffix depending on the string kind, may add other information
  //   such as the string heap object address, may truncate long strings, etc.
  const char* PrefixForDebugPrint() const;
  const char* SuffixForDebugPrint() const;
  void StringShortPrint(StringStream* accumulator);
  void PrintUC16(std::ostream& os, int start = 0, int end = -1);
  void PrintUC16(StringStream* accumulator, int start, int end);

  // Dispatched behavior.
#if defined(DEBUG) || defined(OBJECT_PRINT)
  char* ToAsciiArray();
#endif
  DECL_PRINTER(String)
  DECL_VERIFIER(String)

  inline bool IsFlat() const;
  inline bool IsFlat(PtrComprCageBase cage_base) const;

  // Max char codes.
  static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
  static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
  static const int kMaxUtf16CodeUnit = 0xffff;
  static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
  static const base::uc32 kMaxCodePoint = 0x10ffff;

  // Maximal string length.
  // The max length is different on 32 and 64 bit platforms. Max length for
  // 32-bit platforms is ~268.4M chars. On 64-bit platforms, max length is
  // ~536.8M chars.
  // See include/v8.h for the definition.
  static const int kMaxLength = v8::String::kMaxLength;
  // There are several defining limits imposed by our current implementation:
  // - any string's length must fit into a Smi.
  static_assert(kMaxLength <= kSmiMaxValue,
                "String length must fit into a Smi");
  // - adding two string lengths must still fit into a 32-bit int without
  //   overflow
  static_assert(kMaxLength * 2 <= kMaxInt,
                "String::kMaxLength * 2 must fit into an int32");
  // - any heap object's size in bytes must be able to fit into a Smi, because
  //   its space on the heap might be filled with a Filler; for strings this
  //   means SeqTwoByteString::kMaxSize must be able to fit into a Smi.
  static_assert(kMaxLength * 2 + kHeaderSize <= kSmiMaxValue,
                "String object size in bytes must fit into a Smi");
  // - any heap object's size in bytes must be able to fit into an int, because
  //   that's what our object handling code uses almost everywhere.
  static_assert(kMaxLength * 2 + kHeaderSize <= kMaxInt,
                "String object size in bytes must fit into an int");

  // Max length for computing hash. For strings longer than this limit the
  // string length is used as the hash value.
  static const int kMaxHashCalcLength = 16383;

  // Limit for truncation in short printing.
  static const int kMaxShortPrintLength = 1024;

  // Helper function for flattening strings.
  template <typename sinkchar>
  EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
  static void WriteToFlat(String source, sinkchar* sink, int from, int to);
  template <typename sinkchar>
  static void WriteToFlat(String source, sinkchar* sink, int from, int to,
                          PtrComprCageBase cage_base,
                          const SharedStringAccessGuardIfNeeded&);

  static inline bool IsAscii(const char* chars, int length) {
    return IsAscii(reinterpret_cast<const uint8_t*>(chars), length);
  }

  static inline bool IsAscii(const uint8_t* chars, int length) {
    return NonAsciiStart(chars, length) >= length;
  }

  static inline int NonOneByteStart(const base::uc16* chars, int length) {
    DCHECK(IsAligned(reinterpret_cast<Address>(chars), sizeof(base::uc16)));
    const uint16_t* start = chars;
    const uint16_t* limit = chars + length;

    if (static_cast<size_t>(length) >= kUIntptrSize) {
      // Check unaligned chars.
      while (!IsAligned(reinterpret_cast<Address>(chars), kUIntptrSize)) {
        if (*chars > unibrow::Latin1::kMaxChar) {
          return static_cast<int>(chars - start);
        }
        ++chars;
      }

      // Check aligned words.
      STATIC_ASSERT(unibrow::Latin1::kMaxChar == 0xFF);
#ifdef V8_TARGET_LITTLE_ENDIAN
      const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0xFF00;
#else
      const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0x00FF;
#endif
      while (chars + sizeof(uintptr_t) <= limit) {
        if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
          break;
        }
        chars += (sizeof(uintptr_t) / sizeof(base::uc16));
      }
    }

    // Check remaining unaligned chars, or find non-one-byte char in word.
    while (chars < limit) {
      if (*chars > unibrow::Latin1::kMaxChar) {
        return static_cast<int>(chars - start);
      }
      ++chars;
    }

    return static_cast<int>(chars - start);
  }

  static inline bool IsOneByte(const base::uc16* chars, int length) {
    return NonOneByteStart(chars, length) >= length;
  }

  // May only be called when a SharedStringAccessGuard is not needed (i.e. on
  // the main thread or on read-only strings).
  template <class Visitor>
  static inline ConsString VisitFlat(Visitor* visitor, String string,
                                     int offset = 0);

  template <class Visitor>
  static inline ConsString VisitFlat(
      Visitor* visitor, String string, int offset,
      const SharedStringAccessGuardIfNeeded& access_guard);

  template <typename IsolateT>
  static Handle<FixedArray> CalculateLineEnds(IsolateT* isolate,
                                              Handle<String> string,
                                              bool include_ending_line);

 private:
  friend class Name;
  friend class StringTableInsertionKey;
  friend class InternalizedStringKey;

  // Implementation of the Get() public methods. Do not use directly.
  V8_INLINE uint16_t
  GetImpl(int index, PtrComprCageBase cage_base,
          const SharedStringAccessGuardIfNeeded& access_guard) const;

  // Implementation of the IsEqualTo() public methods. Do not use directly.
  template <EqualityType kEqType, typename Char>
  V8_INLINE bool IsEqualToImpl(
      base::Vector<const Char> str, PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // Out-of-line IsEqualToImpl for ConsString.
  template <typename Char>
  V8_NOINLINE static bool IsConsStringEqualToImpl(
      ConsString string, int slice_offset, base::Vector<const Char> str,
      PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard);

  V8_EXPORT_PRIVATE static Handle<String> SlowFlatten(
      Isolate* isolate, Handle<ConsString> cons, AllocationType allocation);

  // Slow case of String::Equals.  This implementation works on any strings
  // but it is most efficient on strings that are almost flat.
  V8_EXPORT_PRIVATE bool SlowEquals(String other) const;
  V8_EXPORT_PRIVATE bool SlowEquals(
      String other, const SharedStringAccessGuardIfNeeded&) const;

  V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate, Handle<String> one,
                                           Handle<String> two);

  // Slow case of AsArrayIndex.
  V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
  V8_EXPORT_PRIVATE bool SlowAsIntegerIndex(size_t* index);

  // Compute and set the hash code.
  V8_EXPORT_PRIVATE uint32_t ComputeAndSetHash();
  V8_EXPORT_PRIVATE uint32_t
  ComputeAndSetHash(const SharedStringAccessGuardIfNeeded&);

  TQ_OBJECT_CONSTRUCTORS(String)
};

// clang-format off
extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
void String::WriteToFlat(String source, uint8_t* sink, int from, int to);
extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
void String::WriteToFlat(String source, uint16_t* sink, int from, int to);
extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
void String::WriteToFlat(String source, uint8_t* sink, int from, int to,
                         PtrComprCageBase cage_base,
                         const SharedStringAccessGuardIfNeeded&);
extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
void String::WriteToFlat(String source, uint16_t* sink, int from, int to,
                         PtrComprCageBase cage_base,
                         const SharedStringAccessGuardIfNeeded&);
// clang-format on

class SubStringRange {
 public:
  inline SubStringRange(String string, const DisallowGarbageCollection& no_gc,
                        int first = 0, int length = -1);
  class iterator;
  inline iterator begin();
  inline iterator end();

 private:
  String string_;
  int first_;
  int length_;
  const DisallowGarbageCollection& no_gc_;
};

// The SeqString abstract class captures sequential string values.
class SeqString : public TorqueGeneratedSeqString<SeqString, String> {
 public:
  // Truncate the string in-place if possible and return the result.
  // In case of new_length == 0, the empty string is returned without
  // truncating the original string.
  V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
                                                       int new_length);

  TQ_OBJECT_CONSTRUCTORS(SeqString)
};

class InternalizedString
    : public TorqueGeneratedInternalizedString<InternalizedString, String> {
 public:
  // TODO(neis): Possibly move some stuff from String here.

  TQ_OBJECT_CONSTRUCTORS(InternalizedString)
};

// The OneByteString class captures sequential one-byte string objects.
// Each character in the OneByteString is an one-byte character.
class SeqOneByteString
    : public TorqueGeneratedSeqOneByteString<SeqOneByteString, SeqString> {
 public:
  static const bool kHasOneByteEncoding = true;
  using Char = uint8_t;

  // Dispatched behavior. The non SharedStringAccessGuardIfNeeded method is also
  // defined for convenience and it will check that the access guard is not
  // needed.
  inline uint8_t Get(int index) const;
  inline uint8_t Get(int index, PtrComprCageBase cage_base,
                     const SharedStringAccessGuardIfNeeded& access_guard) const;
  inline void SeqOneByteStringSet(int index, uint16_t value);

  // Get the address of the characters in this string.
  inline Address GetCharsAddress() const;

  // Get a pointer to the characters of the string. May only be called when a
  // SharedStringAccessGuard is not needed (i.e. on the main thread or on
  // read-only strings).
  inline uint8_t* GetChars(const DisallowGarbageCollection& no_gc) const;

  // Get a pointer to the characters of the string.
  inline uint8_t* GetChars(
      const DisallowGarbageCollection& no_gc,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // Clear uninitialized padding space. This ensures that the snapshot content
  // is deterministic.
  void clear_padding();

  // Garbage collection support.  This method is called by the
  // garbage collector to compute the actual size of an OneByteString
  // instance.
  inline int SeqOneByteStringSize(InstanceType instance_type);

  // Maximal memory usage for a single sequential one-byte string.
  static const int kMaxCharsSize = kMaxLength;
  static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
  STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);

  int AllocatedSize();

  class BodyDescriptor;

  TQ_OBJECT_CONSTRUCTORS(SeqOneByteString)
};

// The TwoByteString class captures sequential unicode string objects.
// Each character in the TwoByteString is a two-byte uint16_t.
class SeqTwoByteString
    : public TorqueGeneratedSeqTwoByteString<SeqTwoByteString, SeqString> {
 public:
  static const bool kHasOneByteEncoding = false;
  using Char = uint16_t;

  // Dispatched behavior.
  inline uint16_t Get(
      int index, PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard) const;
  inline void SeqTwoByteStringSet(int index, uint16_t value);

  // Get the address of the characters in this string.
  inline Address GetCharsAddress() const;

  // Get a pointer to the characters of the string. May only be called when a
  // SharedStringAccessGuard is not needed (i.e. on the main thread or on
  // read-only strings).
  inline base::uc16* GetChars(const DisallowGarbageCollection& no_gc) const;

  // Get a pointer to the characters of the string.
  inline base::uc16* GetChars(
      const DisallowGarbageCollection& no_gc,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // Clear uninitialized padding space. This ensures that the snapshot content
  // is deterministic.
  void clear_padding();

  // Garbage collection support.  This method is called by the
  // garbage collector to compute the actual size of a TwoByteString
  // instance.
  inline int SeqTwoByteStringSize(InstanceType instance_type);

  // Maximal memory usage for a single sequential two-byte string.
  static const int kMaxCharsSize = kMaxLength * 2;
  static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
  STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
                String::kMaxLength);

  int AllocatedSize();

  class BodyDescriptor;

  TQ_OBJECT_CONSTRUCTORS(SeqTwoByteString)
};

// The ConsString class describes string values built by using the
// addition operator on strings.  A ConsString is a pair where the
// first and second components are pointers to other string values.
// One or both components of a ConsString can be pointers to other
// ConsStrings, creating a binary tree of ConsStrings where the leaves
// are non-ConsString string values.  The string value represented by
// a ConsString can be obtained by concatenating the leaf string
// values in a left-to-right depth-first traversal of the tree.
class ConsString : public TorqueGeneratedConsString<ConsString, String> {
 public:
  // Doesn't check that the result is a string, even in debug mode.  This is
  // useful during GC where the mark bits confuse the checks.
  inline Object unchecked_first();

  // Doesn't check that the result is a string, even in debug mode.  This is
  // useful during GC where the mark bits confuse the checks.
  inline Object unchecked_second();

  // Dispatched behavior.
  V8_EXPORT_PRIVATE uint16_t
  Get(int index, PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // Minimum length for a cons string.
  static const int kMinLength = 13;

  class BodyDescriptor;

  DECL_VERIFIER(ConsString)

  TQ_OBJECT_CONSTRUCTORS(ConsString)
};

// The ThinString class describes string objects that are just references
// to another string object. They are used for in-place internalization when
// the original string cannot actually be internalized in-place: in these
// cases, the original string is converted to a ThinString pointing at its
// internalized version (which is allocated as a new object).
// In terms of memory layout and most algorithms operating on strings,
// ThinStrings can be thought of as "one-part cons strings".
class ThinString : public TorqueGeneratedThinString<ThinString, String> {
 public:
  DECL_GETTER(unchecked_actual, HeapObject)

  V8_EXPORT_PRIVATE uint16_t
  Get(int index, PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  DECL_VERIFIER(ThinString)

  class BodyDescriptor;

  TQ_OBJECT_CONSTRUCTORS(ThinString)
};

// The Sliced String class describes strings that are substrings of another
// sequential string.  The motivation is to save time and memory when creating
// a substring.  A Sliced String is described as a pointer to the parent,
// the offset from the start of the parent string and the length.  Using
// a Sliced String therefore requires unpacking of the parent string and
// adding the offset to the start address.  A substring of a Sliced String
// are not nested since the double indirection is simplified when creating
// such a substring.
// Currently missing features are:
//  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
class SlicedString : public TorqueGeneratedSlicedString<SlicedString, String> {
 public:
  inline void set_parent(String parent,
                         WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
  // Dispatched behavior.
  V8_EXPORT_PRIVATE uint16_t
  Get(int index, PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // Minimum length for a sliced string.
  static const int kMinLength = 13;

  class BodyDescriptor;

  DECL_VERIFIER(SlicedString)

  TQ_OBJECT_CONSTRUCTORS(SlicedString)
};

// The ExternalString class describes string values that are backed by
// a string resource that lies outside the V8 heap.  ExternalStrings
// consist of the length field common to all strings, a pointer to the
// external resource.  It is important to ensure (externally) that the
// resource is not deallocated while the ExternalString is live in the
// V8 heap.
//
// The API expects that all ExternalStrings are created through the
// API.  Therefore, ExternalStrings should not be used internally.
class ExternalString
    : public TorqueGeneratedExternalString<ExternalString, String> {
 public:
  DECL_VERIFIER(ExternalString)

  // Size of uncached external strings.
  static const int kUncachedSize =
      kResourceOffset + FIELD_SIZE(kResourceOffset);

  inline void AllocateExternalPointerEntries(Isolate* isolate);

  // Return whether the external string data pointer is not cached.
  inline bool is_uncached() const;
  // Size in bytes of the external payload.
  int ExternalPayloadSize() const;

  // Used in the serializer/deserializer.
  DECL_GETTER(resource_as_address, Address)
  inline void set_address_as_resource(Isolate* isolate, Address address);
  inline uint32_t GetResourceRefForDeserialization();
  inline void SetResourceRefForSerialization(uint32_t ref);

  // Disposes string's resource object if it has not already been disposed.
  inline void DisposeResource(Isolate* isolate);

  STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
  static const int kSizeOfAllExternalStrings = kHeaderSize;

 private:
  // Hide generated accessors.
  DECL_ACCESSORS(resource, void*)
  DECL_ACCESSORS(resource_data, void*)

  TQ_OBJECT_CONSTRUCTORS(ExternalString)
};

// The ExternalOneByteString class is an external string backed by an
// one-byte string.
class ExternalOneByteString
    : public TorqueGeneratedExternalOneByteString<ExternalOneByteString,
                                                  ExternalString> {
 public:
  static const bool kHasOneByteEncoding = true;

  using Resource = v8::String::ExternalOneByteStringResource;

  // The underlying resource.
  DECL_GETTER(resource, const Resource*)

  // It is assumed that the previous resource is null. If it is not null, then
  // it is the responsability of the caller the handle the previous resource.
  inline void SetResource(Isolate* isolate, const Resource* buffer);

  // Used only during serialization.
  inline void set_resource(Isolate* isolate, const Resource* buffer);

  // Update the pointer cache to the external character array.
  // The cached pointer is always valid, as the external character array does =
  // not move during lifetime.  Deserialization is the only exception, after
  // which the pointer cache has to be refreshed.
  inline void update_data_cache(Isolate* isolate);

  inline const uint8_t* GetChars() const;

  // Dispatched behavior.
  inline uint8_t Get(int index, PtrComprCageBase cage_base,
                     const SharedStringAccessGuardIfNeeded& access_guard) const;

  class BodyDescriptor;

  STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);

  TQ_OBJECT_CONSTRUCTORS(ExternalOneByteString)

 private:
  // The underlying resource as a non-const pointer.
  DECL_GETTER(mutable_resource, Resource*)
};

// The ExternalTwoByteString class is an external string backed by a UTF-16
// encoded string.
class ExternalTwoByteString
    : public TorqueGeneratedExternalTwoByteString<ExternalTwoByteString,
                                                  ExternalString> {
 public:
  static const bool kHasOneByteEncoding = false;

  using Resource = v8::String::ExternalStringResource;

  // The underlying string resource.
  DECL_GETTER(resource, const Resource*)

  // It is assumed that the previous resource is null. If it is not null, then
  // it is the responsability of the caller the handle the previous resource.
  inline void SetResource(Isolate* isolate, const Resource* buffer);

  // Used only during serialization.
  inline void set_resource(Isolate* isolate, const Resource* buffer);

  // Update the pointer cache to the external character array.
  // The cached pointer is always valid, as the external character array does =
  // not move during lifetime.  Deserialization is the only exception, after
  // which the pointer cache has to be refreshed.
  inline void update_data_cache(Isolate* isolate);

  inline const uint16_t* GetChars() const;

  // Dispatched behavior.
  inline uint16_t Get(
      int index, PtrComprCageBase cage_base,
      const SharedStringAccessGuardIfNeeded& access_guard) const;

  // For regexp code.
  inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);

  class BodyDescriptor;

  STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);

  TQ_OBJECT_CONSTRUCTORS(ExternalTwoByteString)

 private:
  // The underlying resource as a non-const pointer.
  DECL_GETTER(mutable_resource, Resource*)
};

// A flat string reader provides random access to the contents of a
// string independent of the character width of the string. The handle
// must be valid as long as the reader is being used.
// Not safe to use from concurrent background threads.
class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable {
 public:
  FlatStringReader(Isolate* isolate, Handle<String> str);
  void PostGarbageCollection() override;
  inline base::uc32 Get(int index) const;
  template <typename Char>
  inline Char Get(int index) const;
  int length() { return length_; }

 private:
  Handle<String> str_;
  bool is_one_byte_;
  int length_;
  const void* start_;
};

// This maintains an off-stack representation of the stack frames required
// to traverse a ConsString, allowing an entirely iterative and restartable
// traversal of the entire string
class ConsStringIterator {
 public:
  inline ConsStringIterator() = default;
  inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
    Reset(cons_string, offset);
  }
  ConsStringIterator(const ConsStringIterator&) = delete;
  ConsStringIterator& operator=(const ConsStringIterator&) = delete;
  inline void Reset(ConsString cons_string, int offset = 0) {
    depth_ = 0;
    // Next will always return nullptr.
    if (cons_string.is_null()) return;
    Initialize(cons_string, offset);
  }
  // Returns nullptr when complete.
  inline String Next(int* offset_out) {
    *offset_out = 0;
    if (depth_ == 0) return String();
    return Continue(offset_out);
  }

 private:
  static const int kStackSize = 32;
  // Use a mask instead of doing modulo operations for stack wrapping.
  static const int kDepthMask = kStackSize - 1;
  static_assert(base::bits::IsPowerOfTwo(kStackSize),
                "kStackSize must be power of two");
  static inline int OffsetForDepth(int depth);

  inline void PushLeft(ConsString string);
  inline void PushRight(ConsString string);
  inline void AdjustMaximumDepth();
  inline void Pop();
  inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
  V8_EXPORT_PRIVATE void Initialize(ConsString cons_string, int offset);
  V8_EXPORT_PRIVATE String Continue(int* offset_out);
  String NextLeaf(bool* blew_stack);
  String Search(int* offset_out);

  // Stack must always contain only frames for which right traversal
  // has not yet been performed.
  ConsString frames_[kStackSize];
  ConsString root_;
  int depth_;
  int maximum_depth_;
  int consumed_;
};

class StringCharacterStream;

template <typename Char>
struct CharTraits;

template <>
struct CharTraits<uint8_t> {
  using String = SeqOneByteString;
  using ExternalString = ExternalOneByteString;
};

template <>
struct CharTraits<uint16_t> {
  using String = SeqTwoByteString;
  using ExternalString = ExternalTwoByteString;
};

}  // namespace internal
}  // namespace v8

#include "src/objects/object-macros-undef.h"

#endif  // V8_OBJECTS_STRING_H_