builtins-regexp-gen.cc 58.4 KB
Newer Older
1 2 3 4 5 6
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/builtins/builtins-regexp-gen.h"

7
#include "src/builtins/builtins-constructor-gen.h"
8 9
#include "src/builtins/builtins-utils-gen.h"
#include "src/builtins/builtins.h"
10
#include "src/builtins/growable-fixed-array-gen.h"
11 12
#include "src/codegen/code-factory.h"
#include "src/codegen/code-stub-assembler.h"
13
#include "src/codegen/macro-assembler.h"
14
#include "src/execution/protectors.h"
15
#include "src/heap/factory-inl.h"
16
#include "src/logging/counters.h"
17
#include "src/objects/js-regexp-string-iterator.h"
18
#include "src/objects/js-regexp.h"
19
#include "src/objects/regexp-match-info.h"
20
#include "src/regexp/regexp.h"
21 22 23 24 25 26

namespace v8 {
namespace internal {

using compiler::Node;

27 28 29 30 31 32 33 34
// Tail calls the regular expression interpreter.
// static
void Builtins::Generate_RegExpInterpreterTrampoline(MacroAssembler* masm) {
  ExternalReference interpreter_code_entry =
      ExternalReference::re_match_for_call_from_js(masm->isolate());
  masm->Jump(interpreter_code_entry);
}

35 36 37 38 39 40
TNode<Smi> RegExpBuiltinsAssembler::SmiZero() { return SmiConstant(0); }

TNode<IntPtrT> RegExpBuiltinsAssembler::IntPtrZero() {
  return IntPtrConstant(0);
}

41 42 43 44 45 46
// If code is a builtin, return the address to the (possibly embedded) builtin
// code entry, otherwise return the entry of the code object itself.
TNode<RawPtrT> RegExpBuiltinsAssembler::LoadCodeObjectEntry(TNode<Code> code) {
  TVARIABLE(RawPtrT, var_result);

  Label if_code_is_off_heap(this), out(this);
47 48
  TNode<Int32T> builtin_index =
      LoadObjectField<Int32T>(code, Code::kBuiltinIndexOffset);
49
  {
50 51
    GotoIfNot(Word32Equal(builtin_index, Int32Constant(Builtins::kNoBuiltinId)),
              &if_code_is_off_heap);
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
    var_result = ReinterpretCast<RawPtrT>(
        IntPtrAdd(BitcastTaggedToWord(code),
                  IntPtrConstant(Code::kHeaderSize - kHeapObjectTag)));
    Goto(&out);
  }

  BIND(&if_code_is_off_heap);
  {
    TNode<IntPtrT> builtin_entry_offset_from_isolate_root =
        IntPtrAdd(IntPtrConstant(IsolateData::builtin_entry_table_offset()),
                  ChangeInt32ToIntPtr(Word32Shl(
                      builtin_index, Int32Constant(kSystemPointerSizeLog2))));

    var_result = ReinterpretCast<RawPtrT>(
        Load(MachineType::Pointer(),
             ExternalConstant(ExternalReference::isolate_root(isolate())),
             builtin_entry_offset_from_isolate_root));
    Goto(&out);
  }

  BIND(&out);
  return var_result.value();
}

76 77 78
// -----------------------------------------------------------------------------
// ES6 section 21.2 RegExp Objects

79 80
TNode<JSRegExpResult> RegExpBuiltinsAssembler::AllocateRegExpResult(
    TNode<Context> context, TNode<Smi> length, TNode<Smi> index,
81
    TNode<String> input, TNode<JSRegExp> regexp, TNode<Number> last_index,
82
    TNode<FixedArray>* elements_out) {
83 84 85
  CSA_ASSERT(this, SmiLessThanOrEqual(
                       length, SmiConstant(JSArray::kMaxFastArrayLength)));
  CSA_ASSERT(this, SmiGreaterThan(length, SmiConstant(0)));
86

87
  // Allocate.
88 89

  const ElementsKind elements_kind = PACKED_ELEMENTS;
90 91
  TNode<Map> map = CAST(LoadContextElement(LoadNativeContext(context),
                                           Context::REGEXP_RESULT_MAP_INDEX));
92
  TNode<AllocationSite> no_allocation_site = {};
93
  TNode<IntPtrT> length_intptr = SmiUntag(length);
94

95 96 97 98 99 100
  // Note: The returned `elements` may be in young large object space, but
  // `array` is guaranteed to be in new space so we could skip write barriers
  // below.
  TNode<JSArray> array;
  TNode<FixedArrayBase> elements;
  std::tie(array, elements) = AllocateUninitializedJSArrayWithElements(
101
      elements_kind, map, length, no_allocation_site, length_intptr,
102
      kAllowLargeObjectAllocation, JSRegExpResult::kSize);
103

104
  // Finish result initialization.
105

106
  TNode<JSRegExpResult> result = UncheckedCast<JSRegExpResult>(array);
107

108 109 110 111
  // Load undefined value once here to avoid multiple LoadRoots.
  TNode<Oddball> undefined_value = UncheckedCast<Oddball>(
      CodeAssembler::LoadRoot(RootIndex::kUndefinedValue));

112
  StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kIndexOffset, index);
113 114
  // TODO(jgruber,tebbi): Could skip barrier but the MemoryOptimizer complains.
  StoreObjectField(result, JSRegExpResult::kInputOffset, input);
115
  StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kGroupsOffset,
116 117 118 119
                                 undefined_value);
  StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kNamesOffset,
                                 undefined_value);

120 121 122 123 124 125 126 127 128 129 130 131 132 133
  // Stash regexp in order to re-execute and build JSRegExpResultIndices lazily
  // when the 'indices' property is accessed.
  StoreObjectField(result, JSRegExpResult::kCachedIndicesOrRegexpOffset,
                   regexp);
  StoreObjectField(result, JSRegExpResult::kRegexpInputOffset, input);

  // If non-smi last_index then store an SmiZero instead.
  {
    TNode<Smi> last_index_smi = Select<Smi>(
        TaggedIsSmi(last_index), [=] { return CAST(last_index); },
        [=] { return SmiZero(); });
    StoreObjectField(result, JSRegExpResult::kRegexpLastIndexOffset,
                     last_index_smi);
  }
134

135
  // Finish elements initialization.
136

137
  FillFixedArrayWithValue(elements_kind, elements, IntPtrZero(), length_intptr,
138
                          RootIndex::kUndefinedValue);
139

140
  if (elements_out) *elements_out = CAST(elements);
141
  return result;
142 143
}

144
TNode<Object> RegExpBuiltinsAssembler::FastLoadLastIndexBeforeSmiCheck(
145
    TNode<JSRegExp> regexp) {
146 147
  // Load the in-object field.
  static const int field_offset =
148
      JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
149 150 151
  return LoadObjectField(regexp, field_offset);
}

152 153
TNode<Object> RegExpBuiltinsAssembler::SlowLoadLastIndex(TNode<Context> context,
                                                         TNode<Object> regexp) {
154 155 156 157 158
  return GetProperty(context, regexp, isolate()->factory()->lastIndex_string());
}

// The fast-path of StoreLastIndex when regexp is guaranteed to be an unmodified
// JSRegExp instance.
159 160
void RegExpBuiltinsAssembler::FastStoreLastIndex(TNode<JSRegExp> regexp,
                                                 TNode<Smi> value) {
161 162
  // Store the in-object field.
  static const int field_offset =
163
      JSRegExp::kHeaderSize + JSRegExp::kLastIndexFieldIndex * kTaggedSize;
164 165 166
  StoreObjectField(regexp, field_offset, value);
}

167 168
void RegExpBuiltinsAssembler::SlowStoreLastIndex(SloppyTNode<Context> context,
                                                 SloppyTNode<Object> regexp,
169
                                                 SloppyTNode<Object> value) {
170
  TNode<String> name = HeapConstant(isolate()->factory()->lastIndex_string());
171
  SetPropertyStrict(context, regexp, name, value);
172 173
}

174
TNode<JSRegExpResult> RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
175 176 177
    TNode<Context> context, TNode<JSRegExp> regexp,
    TNode<RegExpMatchInfo> match_info, TNode<String> string,
    TNode<Number> last_index) {
178 179
  Label named_captures(this), out(this);

180
  TNode<IntPtrT> num_indices = SmiUntag(CAST(UnsafeLoadFixedArrayElement(
181 182
      match_info, RegExpMatchInfo::kNumberOfCapturesIndex)));
  TNode<Smi> num_results = SmiTag(WordShr(num_indices, 1));
183 184 185
  TNode<Smi> start = CAST(UnsafeLoadFixedArrayElement(
      match_info, RegExpMatchInfo::kFirstCaptureIndex));
  TNode<Smi> end = CAST(UnsafeLoadFixedArrayElement(
186
      match_info, RegExpMatchInfo::kFirstCaptureIndex + 1));
187 188 189

  // Calculate the substring of the first match before creating the result array
  // to avoid an unnecessary write barrier storing the first result.
190

191 192
  TNode<String> first =
      CAST(CallBuiltin(Builtins::kSubString, context, string, start, end));
193

194
  TNode<FixedArray> result_elements;
195 196 197
  TNode<JSRegExpResult> result =
      AllocateRegExpResult(context, num_results, start, string, regexp,
                           last_index, &result_elements);
198

199
  UnsafeStoreFixedArrayElement(result_elements, 0, first);
200 201 202 203 204

  // If no captures exist we can skip named capture handling as well.
  GotoIf(SmiEqual(num_results, SmiConstant(1)), &out);

  // Store all remaining captures.
205
  TNode<IntPtrT> limit = IntPtrAdd(
206 207
      IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), num_indices);

208 209 210
  TVARIABLE(IntPtrT, var_from_cursor,
            IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex + 2));
  TVARIABLE(IntPtrT, var_to_cursor, IntPtrConstant(1));
211

212
  Label loop(this, {&var_from_cursor, &var_to_cursor});
213 214

  Goto(&loop);
215
  BIND(&loop);
216
  {
217 218
    TNode<IntPtrT> from_cursor = var_from_cursor.value();
    TNode<IntPtrT> to_cursor = var_to_cursor.value();
219 220
    TNode<Smi> start =
        CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor));
221 222 223 224

    Label next_iter(this);
    GotoIf(SmiEqual(start, SmiConstant(-1)), &next_iter);

225 226
    TNode<IntPtrT> from_cursor_plus1 =
        IntPtrAdd(from_cursor, IntPtrConstant(1));
227 228
    TNode<Smi> end =
        CAST(UnsafeLoadFixedArrayElement(match_info, from_cursor_plus1));
229

230
    TNode<String> capture =
231
        CAST(CallBuiltin(Builtins::kSubString, context, string, start, end));
232
    UnsafeStoreFixedArrayElement(result_elements, to_cursor, capture);
233 234
    Goto(&next_iter);

235
    BIND(&next_iter);
236 237
    var_from_cursor = IntPtrAdd(from_cursor, IntPtrConstant(2));
    var_to_cursor = IntPtrAdd(to_cursor, IntPtrConstant(1));
238 239 240 241
    Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop,
           &named_captures);
  }

242
  BIND(&named_captures);
243
  {
244 245
    CSA_ASSERT(this, SmiGreaterThan(num_results, SmiConstant(1)));

246 247 248
    // Preparations for named capture properties. Exit early if the result does
    // not have any named captures to minimize performance impact.

249
    TNode<FixedArray> data =
250
        CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
251 252 253

    // We reach this point only if captures exist, implying that this is an
    // IRREGEXP JSRegExp.
254 255 256
    CSA_ASSERT(this,
               SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
                        SmiConstant(JSRegExp::IRREGEXP)));
257 258 259

    // The names fixed array associates names at even indices with a capture
    // index at odd indices.
260
    TNode<Object> maybe_names =
261
        LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureNameMapIndex);
262
    GotoIf(TaggedEqual(maybe_names, SmiZero()), &out);
263

264 265 266 267 268 269
    // One or more named captures exist, add a property for each one.

    TNode<FixedArray> names = CAST(maybe_names);
    TNode<IntPtrT> names_length = LoadAndUntagFixedArrayBaseLength(names);
    CSA_ASSERT(this, IntPtrGreaterThan(names_length, IntPtrZero()));

270 271 272
    // Stash names in case we need them to build the indices array later.
    StoreObjectField(result, JSRegExpResult::kNamesOffset, names);

273 274 275 276
    // Allocate a new object to store the named capture properties.
    // TODO(jgruber): Could be optimized by adding the object map to the heap
    // root list.

277
    TNode<IntPtrT> num_properties = WordSar(names_length, 1);
278
    TNode<NativeContext> native_context = LoadNativeContext(context);
279 280
    TNode<Map> map = CAST(LoadContextElement(
        native_context, Context::SLOW_OBJECT_WITH_NULL_PROTOTYPE_MAP));
281 282
    TNode<NameDictionary> properties =
        AllocateNameDictionary(num_properties, kAllowLargeObjectAllocation);
283

284
    TNode<JSObject> group_object = AllocateJSObjectFromMap(map, properties);
285
    StoreObjectField(result, JSRegExpResult::kGroupsOffset, group_object);
286

287
    TVARIABLE(IntPtrT, var_i, IntPtrZero());
288

289
    Label loop(this, &var_i);
290 291

    Goto(&loop);
292
    BIND(&loop);
293
    {
294 295 296
      TNode<IntPtrT> i = var_i.value();
      TNode<IntPtrT> i_plus_1 = IntPtrAdd(i, IntPtrConstant(1));
      TNode<IntPtrT> i_plus_2 = IntPtrAdd(i_plus_1, IntPtrConstant(1));
297

298 299 300 301
      TNode<String> name = CAST(LoadFixedArrayElement(names, i));
      TNode<Smi> index = CAST(LoadFixedArrayElement(names, i_plus_1));
      TNode<HeapObject> capture =
          CAST(LoadFixedArrayElement(result_elements, SmiUntag(index)));
302

303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
      // TODO(v8:8213): For maintainability, we should call a CSA/Torque
      // implementation of CreateDataProperty instead.

      // At this point the spec says to call CreateDataProperty. However, we can
      // skip most of the steps and go straight to adding a dictionary entry
      // because we know a bunch of useful facts:
      // - All keys are non-numeric internalized strings
      // - No keys repeat
      // - Receiver has no prototype
      // - Receiver isn't used as a prototype
      // - Receiver isn't any special object like a Promise intrinsic object
      // - Receiver is extensible
      // - Receiver has no interceptors
      Label add_dictionary_property_slow(this, Label::kDeferred);
      Add<NameDictionary>(properties, name, capture,
                          &add_dictionary_property_slow);
319

320
      var_i = i_plus_2;
321 322
      Branch(IntPtrGreaterThanOrEqual(var_i.value(), names_length), &out,
             &loop);
323 324 325 326 327 328

      BIND(&add_dictionary_property_slow);
      // If the dictionary needs resizing, the above Add call will jump here
      // before making any changes. This shouldn't happen because we allocated
      // the dictionary with enough space above.
      Unreachable();
329 330 331
    }
  }

332
  BIND(&out);
333 334 335 336
  return result;
}

void RegExpBuiltinsAssembler::GetStringPointers(
337 338 339 340
    TNode<RawPtrT> string_data, TNode<IntPtrT> offset,
    TNode<IntPtrT> last_index, TNode<IntPtrT> string_length,
    String::Encoding encoding, TVariable<RawPtrT>* var_string_start,
    TVariable<RawPtrT>* var_string_end) {
341 342 343 344 345 346 347
  DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation());
  DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation());

  const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING)
                                ? UINT8_ELEMENTS
                                : UINT16_ELEMENTS;

348 349 350 351
  TNode<IntPtrT> from_offset =
      ElementOffsetFromIndex(IntPtrAdd(offset, last_index), kind);
  *var_string_start =
      ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, from_offset));
352

353 354 355
  TNode<IntPtrT> to_offset =
      ElementOffsetFromIndex(IntPtrAdd(offset, string_length), kind);
  *var_string_end = ReinterpretCast<RawPtrT>(IntPtrAdd(string_data, to_offset));
356 357
}

358
TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
359
    TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
360
    TNode<Number> last_index, TNode<RegExpMatchInfo> match_info) {
361 362
  ToDirectStringAssembler to_direct(state(), string);

363
  TVARIABLE(HeapObject, var_result);
364
  Label out(this), atom(this), runtime(this, Label::kDeferred);
365 366

  // External constants.
367
  TNode<ExternalReference> isolate_address =
368
      ExternalConstant(ExternalReference::isolate_address(isolate()));
369 370
  TNode<ExternalReference> regexp_stack_memory_top_address = ExternalConstant(
      ExternalReference::address_of_regexp_stack_memory_top_address(isolate()));
371
  TNode<ExternalReference> static_offsets_vector_address = ExternalConstant(
372 373
      ExternalReference::address_of_static_offsets_vector(isolate()));

374 375 376 377 378 379
  // At this point, last_index is definitely a canonicalized non-negative
  // number, which implies that any non-Smi last_index is greater than
  // the maximal string length. If lastIndex > string.length then the matcher
  // must fail.

  Label if_failure(this);
380 381 382 383 384

  CSA_ASSERT(this, IsNumberNormalized(last_index));
  CSA_ASSERT(this, IsNumberPositive(last_index));
  GotoIf(TaggedIsNotSmi(last_index), &if_failure);

385 386
  TNode<IntPtrT> int_string_length = LoadStringLengthAsWord(string);
  TNode<IntPtrT> int_last_index = SmiUntag(CAST(last_index));
387 388

  GotoIf(UintPtrGreaterThan(int_last_index, int_string_length), &if_failure);
389

390 391
  // Since the RegExp has been compiled, data contains a fixed array.
  TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
392
  {
393
    // Dispatch on the type of the RegExp.
394
    {
395
      Label next(this), unreachable(this, Label::kDeferred);
396
      TNode<Int32T> tag = LoadAndUntagToWord32FixedArrayElement(
397 398 399
          data, IntPtrConstant(JSRegExp::kTagIndex));

      int32_t values[] = {
400 401 402
          JSRegExp::IRREGEXP,
          JSRegExp::ATOM,
          JSRegExp::NOT_COMPILED,
403 404 405 406 407 408 409 410 411
      };
      Label* labels[] = {&next, &atom, &runtime};

      STATIC_ASSERT(arraysize(values) == arraysize(labels));
      Switch(tag, &unreachable, values, labels, arraysize(values));

      BIND(&unreachable);
      Unreachable();

412 413
      BIND(&next);
    }
414 415 416

    // Check (number_of_captures + 1) * 2 <= offsets vector size
    // Or              number_of_captures <= offsets vector size / 2 - 1
417 418
    TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
        data, JSRegExp::kIrregexpCaptureCountIndex));
419

420 421 422
    const int kOffsetsSize = Isolate::kJSRegexpStaticOffsetsVectorSize;
    STATIC_ASSERT(kOffsetsSize >= 2);
    GotoIf(SmiAbove(capture_count, SmiConstant(kOffsetsSize / 2 - 1)),
423 424 425 426 427 428 429
           &runtime);
  }

  // Unpack the string if possible.

  to_direct.TryToDirect(&runtime);

430 431
  // Load the irregexp code or bytecode object and offsets into the subject
  // string. Both depend on whether the string is one- or two-byte.
432

433 434 435
  TVARIABLE(RawPtrT, var_string_start);
  TVARIABLE(RawPtrT, var_string_end);
  TVARIABLE(Object, var_code);
436
  TVARIABLE(Object, var_bytecode);
437 438

  {
439
    TNode<RawPtrT> direct_string_data = to_direct.PointerToData(&runtime);
440 441 442 443 444

    Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred);
    Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
           &if_isonebyte, &if_istwobyte);

445
    BIND(&if_isonebyte);
446 447 448 449
    {
      GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
                        int_string_length, String::ONE_BYTE_ENCODING,
                        &var_string_start, &var_string_end);
450
      var_code =
451
          UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex);
452 453
      var_bytecode = UnsafeLoadFixedArrayElement(
          data, JSRegExp::kIrregexpLatin1BytecodeIndex);
454 455 456
      Goto(&next);
    }

457
    BIND(&if_istwobyte);
458 459 460 461
    {
      GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
                        int_string_length, String::TWO_BYTE_ENCODING,
                        &var_string_start, &var_string_end);
462 463
      var_code =
          UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex);
464 465
      var_bytecode = UnsafeLoadFixedArrayElement(
          data, JSRegExp::kIrregexpUC16BytecodeIndex);
466 467 468
      Goto(&next);
    }

469
    BIND(&next);
470 471 472
  }

  // Check that the irregexp code has been generated for the actual string
473 474
  // encoding. If it has, the field contains a code object; and otherwise it
  // contains the uninitialized sentinel as a smi.
475 476 477 478 479 480 481 482 483 484
#ifdef DEBUG
  {
    Label next(this);
    GotoIfNot(TaggedIsSmi(var_code.value()), &next);
    CSA_ASSERT(this, SmiEqual(CAST(var_code.value()),
                              SmiConstant(JSRegExp::kUninitializedValue)));
    Goto(&next);
    BIND(&next);
  }
#endif
485

486 487
  GotoIf(TaggedIsSmi(var_code.value()), &runtime);
  TNode<Code> code = CAST(var_code.value());
488

489
  Label if_success(this), if_exception(this, Label::kDeferred);
490 491 492
  {
    IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);

493 494 495 496 497 498 499 500 501 502 503
    // Set up args for the final call into generated Irregexp code.

    MachineType type_int32 = MachineType::Int32();
    MachineType type_tagged = MachineType::AnyTagged();
    MachineType type_ptr = MachineType::Pointer();

    // Result: A NativeRegExpMacroAssembler::Result return code.
    MachineType retval_type = type_int32;

    // Argument 0: Original subject string.
    MachineType arg0_type = type_tagged;
504
    TNode<String> arg0 = string;
505 506 507

    // Argument 1: Previous index.
    MachineType arg1_type = type_int32;
508
    TNode<Int32T> arg1 = TruncateIntPtrToInt32(int_last_index);
509

510 511
    // Argument 2: Start of string data. This argument is ignored in the
    // interpreter.
512
    MachineType arg2_type = type_ptr;
513
    TNode<RawPtrT> arg2 = var_string_start.value();
514

515 516
    // Argument 3: End of string data. This argument is ignored in the
    // interpreter.
517
    MachineType arg3_type = type_ptr;
518
    TNode<RawPtrT> arg3 = var_string_end.value();
519 520 521

    // Argument 4: static offsets vector buffer.
    MachineType arg4_type = type_ptr;
522
    TNode<ExternalReference> arg4 = static_offsets_vector_address;
523

524 525 526 527 528 529 530 531 532 533 534
    // Argument 5: Number of capture registers.
    // Setting this to the number of registers required to store all captures
    // forces global regexps to behave as non-global.
    TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
        data, JSRegExp::kIrregexpCaptureCountIndex));
    // capture_count is the number of captures without the match itself.
    // Required registers = (capture_count + 1) * 2.
    STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) << 1));
    TNode<Smi> register_count =
        SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);

535
    MachineType arg5_type = type_int32;
536
    TNode<Int32T> arg5 = SmiToInt32(register_count);
537

538 539
    // Argument 6: Start (high end) of backtracking stack memory area. This
    // argument is ignored in the interpreter.
540 541
    TNode<RawPtrT> stack_top = UncheckedCast<RawPtrT>(
        Load(MachineType::Pointer(), regexp_stack_memory_top_address));
542 543

    MachineType arg6_type = type_ptr;
544
    TNode<RawPtrT> arg6 = stack_top;
545 546 547

    // Argument 7: Indicate that this is a direct call from JavaScript.
    MachineType arg7_type = type_int32;
548
    TNode<Int32T> arg7 = Int32Constant(RegExp::CallOrigin::kFromJs);
549 550 551

    // Argument 8: Pass current isolate address.
    MachineType arg8_type = type_ptr;
552
    TNode<ExternalReference> arg8 = isolate_address;
553

554 555 556 557 558 559
    // Argument 9: Regular expression object. This argument is ignored in native
    // irregexp code.
    MachineType arg9_type = type_tagged;
    TNode<JSRegExp> arg9 = regexp;

    TNode<RawPtrT> code_entry = LoadCodeObjectEntry(code);
560

561 562 563 564 565 566 567 568 569 570 571 572
    // AIX uses function descriptors on CFunction calls. code_entry in this case
    // may also point to a Regex interpreter entry trampoline which does not
    // have a function descriptor. This method is ineffective on other platforms
    // and is equivalent to CallCFunction.
    TNode<Int32T> result =
        UncheckedCast<Int32T>(CallCFunctionWithoutFunctionDescriptor(
            code_entry, retval_type, std::make_pair(arg0_type, arg0),
            std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
            std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
            std::make_pair(arg5_type, arg5), std::make_pair(arg6_type, arg6),
            std::make_pair(arg7_type, arg7), std::make_pair(arg8_type, arg8),
            std::make_pair(arg9_type, arg9)));
573 574

    // Check the result.
575 576
    // We expect exactly one result since we force the called regexp to behave
    // as non-global.
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
    TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
    GotoIf(
        IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
        &if_success);
    GotoIf(
        IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
        &if_failure);
    GotoIf(IntPtrEqual(int_result,
                       IntPtrConstant(RegExp::kInternalRegExpException)),
           &if_exception);

    CSA_ASSERT(this, IntPtrEqual(int_result,
                                 IntPtrConstant(RegExp::kInternalRegExpRetry)));
    Goto(&runtime);
  }

593
  BIND(&if_success);
594 595 596 597
  {
    // Check that the last match info has space for the capture registers and
    // the additional information. Ensure no overflow in add.
    STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset);
598
    TNode<Smi> available_slots =
599 600
        SmiSub(LoadFixedArrayBaseLength(match_info),
               SmiConstant(RegExpMatchInfo::kLastMatchOverhead));
601 602
    TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
        data, JSRegExp::kIrregexpCaptureCountIndex));
603
    // Calculate number of register_count = (capture_count + 1) * 2.
604
    TNode<Smi> register_count =
605 606 607 608
        SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
    GotoIf(SmiGreaterThan(register_count, available_slots), &runtime);

    // Fill match_info.
609 610 611 612 613 614 615
    UnsafeStoreFixedArrayElement(match_info,
                                 RegExpMatchInfo::kNumberOfCapturesIndex,
                                 register_count, SKIP_WRITE_BARRIER);
    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
                                 string);
    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
                                 string);
616 617 618

    // Fill match and capture offsets in match_info.
    {
619 620
      TNode<IntPtrT> limit_offset =
          ElementOffsetFromIndex(register_count, INT32_ELEMENTS, 0);
621

622
      TNode<IntPtrT> to_offset = ElementOffsetFromIndex(
623
          IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), PACKED_ELEMENTS,
624
          RegExpMatchInfo::kHeaderSize - kHeapObjectTag);
625
      TVARIABLE(IntPtrT, var_to_offset, to_offset);
626 627

      VariableList vars({&var_to_offset}, zone());
628
      BuildFastLoop<IntPtrT>(
629
          vars, IntPtrZero(), limit_offset,
630
          [&](TNode<IntPtrT> offset) {
631 632 633
            TNode<Int32T> value = UncheckedCast<Int32T>(Load(
                MachineType::Int32(), static_offsets_vector_address, offset));
            TNode<Smi> smi_value = SmiFromInt32(value);
634 635
            StoreNoWriteBarrier(MachineRepresentation::kTagged, match_info,
                                var_to_offset.value(), smi_value);
636
            Increment(&var_to_offset, kTaggedSize);
637
          },
638
          kInt32Size, IndexAdvanceMode::kPost);
639 640
    }

641
    var_result = match_info;
642 643 644
    Goto(&out);
  }

645
  BIND(&if_failure);
646
  {
647
    var_result = NullConstant();
648 649 650
    Goto(&out);
  }

651
  BIND(&if_exception);
652
  {
653 654
// A stack overflow was detected in RegExp code.
#ifdef DEBUG
655
    TNode<ExternalReference> pending_exception_address =
656 657
        ExternalConstant(ExternalReference::Create(
            IsolateAddressId::kPendingExceptionAddress, isolate()));
658 659 660
    CSA_ASSERT(this, IsTheHole(Load(MachineType::AnyTagged(),
                                    pending_exception_address)));
#endif  // DEBUG
661 662
    CallRuntime(Runtime::kThrowStackOverflow, context);
    Unreachable();
663 664
  }

665
  BIND(&runtime);
666
  {
667 668
    var_result = CAST(CallRuntime(Runtime::kRegExpExec, context, regexp, string,
                                  last_index, match_info));
669 670 671
    Goto(&out);
  }

672 673
  BIND(&atom);
  {
674 675
    // TODO(jgruber): A call with 4 args stresses register allocation, this
    // should probably just be inlined.
676 677
    var_result = CAST(CallBuiltin(Builtins::kRegExpExecAtom, context, regexp,
                                  string, last_index, match_info));
678 679 680
    Goto(&out);
  }

681
  BIND(&out);
682 683 684
  return var_result.value();
}

685 686
TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
    TNode<Context> context, TNode<Object> object, TNode<Map> map) {
687
  Label out(this);
688
  TVARIABLE(BoolT, var_result);
689

690
#ifdef V8_ENABLE_FORCE_SLOW_PATH
691
  var_result = Int32FalseConstant();
692 693 694
  GotoIfForceSlowPath(&out);
#endif

695 696
  const TNode<NativeContext> native_context = LoadNativeContext(context);
  const TNode<HeapObject> regexp_fun =
697
      CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
698
  const TNode<Object> initial_map =
699
      LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
700
  const TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
701

702
  var_result = has_initialmap;
703 704 705 706
  GotoIfNot(has_initialmap, &out);

  // The smi check is required to omit ToLength(lastIndex) calls with possible
  // user-code execution on the fast path.
707
  TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
708
  var_result = TaggedIsPositiveSmi(last_index);
709 710
  Goto(&out);

711
  BIND(&out);
712
  return var_result.value();
713 714
}

715 716
TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(
    TNode<Context> context, TNode<Object> object) {
717
  CSA_ASSERT(this, TaggedIsNotSmi(object));
718
  return IsFastRegExpNoPrototype(context, object, LoadMap(CAST(object)));
719 720
}

721
void RegExpBuiltinsAssembler::BranchIfFastRegExp(
722 723 724 725
    TNode<Context> context, TNode<HeapObject> object, TNode<Map> map,
    PrototypeCheckAssembler::Flags prototype_check_flags,
    base::Optional<DescriptorIndexNameValue> additional_property_to_check,
    Label* if_isunmodified, Label* if_ismodified) {
726
  CSA_ASSERT(this, TaggedEqual(LoadMap(object), map));
727

728 729
  GotoIfForceSlowPath(if_ismodified);

730 731
  // This should only be needed for String.p.(split||matchAll), but we are
  // conservative here.
732 733 734 735
  // Note: we are using the current native context here, which may or may not
  // match the object's native context. That's fine: in case of a mismatch, we
  // will bail in the next step when comparing the object's map against the
  // current native context's initial regexp map.
736
  TNode<NativeContext> native_context = LoadNativeContext(context);
737
  GotoIf(IsRegExpSpeciesProtectorCellInvalid(native_context), if_ismodified);
738

739 740 741 742
  TNode<JSFunction> regexp_fun =
      CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
  TNode<Map> initial_map = CAST(
      LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset));
743
  TNode<BoolT> has_initialmap = TaggedEqual(map, initial_map);
744 745 746

  GotoIfNot(has_initialmap, if_ismodified);

747 748 749 750 751 752 753 754 755
  // The smi check is required to omit ToLength(lastIndex) calls with possible
  // user-code execution on the fast path.
  TNode<Object> last_index = FastLoadLastIndexBeforeSmiCheck(CAST(object));
  GotoIfNot(TaggedIsPositiveSmi(last_index), if_ismodified);

  // Verify the prototype.

  TNode<Map> initial_proto_initial_map = CAST(
      LoadContextElement(native_context, Context::REGEXP_PROTOTYPE_MAP_INDEX));
756

757
  DescriptorIndexNameValue properties_to_check[2];
758
  int property_count = 0;
759 760 761
  properties_to_check[property_count++] = DescriptorIndexNameValue{
      JSRegExp::kExecFunctionDescriptorIndex, RootIndex::kexec_string,
      Context::REGEXP_EXEC_FUNCTION_INDEX};
762 763 764 765
  if (additional_property_to_check) {
    properties_to_check[property_count++] = *additional_property_to_check;
  }

766 767 768
  PrototypeCheckAssembler prototype_check_assembler(
      state(), prototype_check_flags, native_context, initial_proto_initial_map,
      Vector<DescriptorIndexNameValue>(properties_to_check, property_count));
769

770 771 772
  TNode<HeapObject> prototype = LoadMapPrototype(map);
  prototype_check_assembler.CheckAndBranch(prototype, if_isunmodified,
                                           if_ismodified);
773 774
}

775 776 777 778 779 780 781 782 783 784 785 786 787
void RegExpBuiltinsAssembler::BranchIfFastRegExp_Strict(
    TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
    Label* if_ismodified) {
  BranchIfFastRegExp(context, object, LoadMap(object),
                     PrototypeCheckAssembler::kCheckPrototypePropertyConstness,
                     base::nullopt, if_isunmodified, if_ismodified);
}

void RegExpBuiltinsAssembler::BranchIfFastRegExp_Permissive(
    TNode<Context> context, TNode<HeapObject> object, Label* if_isunmodified,
    Label* if_ismodified) {
  BranchIfFastRegExp(context, object, LoadMap(object),
                     PrototypeCheckAssembler::kCheckFull, base::nullopt,
788
                     if_isunmodified, if_ismodified);
789 790
}

791 792 793 794
void RegExpBuiltinsAssembler::BranchIfRegExpResult(const TNode<Context> context,
                                                   const TNode<Object> object,
                                                   Label* if_isunmodified,
                                                   Label* if_ismodified) {
795
  // Could be a Smi.
796
  const TNode<Map> map = LoadReceiverMap(object);
797

798 799
  const TNode<NativeContext> native_context = LoadNativeContext(context);
  const TNode<Object> initial_regexp_result_map =
800 801
      LoadContextElement(native_context, Context::REGEXP_RESULT_MAP_INDEX);

802
  Branch(TaggedEqual(map, initial_regexp_result_map), if_isunmodified,
803 804 805
         if_ismodified);
}

806 807
// Fast path stub for ATOM regexps. String matching is done by StringIndexOf,
// and {match_info} is updated on success.
808
// The slow path is implemented in RegExp::AtomExec.
809
TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) {
810 811 812 813 814
  TNode<JSRegExp> regexp = CAST(Parameter(Descriptor::kRegExp));
  TNode<String> subject_string = CAST(Parameter(Descriptor::kString));
  TNode<Smi> last_index = CAST(Parameter(Descriptor::kLastIndex));
  TNode<FixedArray> match_info = CAST(Parameter(Descriptor::kMatchInfo));
  TNode<Context> context = CAST(Parameter(Descriptor::kContext));
815 816 817

  CSA_ASSERT(this, TaggedIsPositiveSmi(last_index));

818
  TNode<FixedArray> data = CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
819 820 821 822
  CSA_ASSERT(
      this,
      SmiEqual(CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kTagIndex)),
               SmiConstant(JSRegExp::ATOM)));
823 824 825

  // Callers ensure that last_index is in-bounds.
  CSA_ASSERT(this,
826 827
             UintPtrLessThanOrEqual(SmiUntag(last_index),
                                    LoadStringLengthAsWord(subject_string)));
828

829
  const TNode<String> needle_string =
830
      CAST(UnsafeLoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex));
831

832
  const TNode<Smi> match_from =
833 834
      CAST(CallBuiltin(Builtins::kStringIndexOf, context, subject_string,
                       needle_string, last_index));
835 836 837 838 839 840 841

  Label if_failure(this), if_success(this);
  Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success);

  BIND(&if_success);
  {
    CSA_ASSERT(this, TaggedIsPositiveSmi(match_from));
842 843
    CSA_ASSERT(this, UintPtrLessThan(SmiUntag(match_from),
                                     LoadStringLengthAsWord(subject_string)));
844 845 846 847

    const int kNumRegisters = 2;
    STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters);

848
    const TNode<Smi> match_to =
849
        SmiAdd(match_from, LoadStringLengthAsSmi(needle_string));
850

851 852 853 854 855 856 857 858 859 860 861 862 863
    UnsafeStoreFixedArrayElement(
        match_info, RegExpMatchInfo::kNumberOfCapturesIndex,
        SmiConstant(kNumRegisters), SKIP_WRITE_BARRIER);
    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
                                 subject_string);
    UnsafeStoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
                                 subject_string);
    UnsafeStoreFixedArrayElement(match_info,
                                 RegExpMatchInfo::kFirstCaptureIndex,
                                 match_from, SKIP_WRITE_BARRIER);
    UnsafeStoreFixedArrayElement(match_info,
                                 RegExpMatchInfo::kFirstCaptureIndex + 1,
                                 match_to, SKIP_WRITE_BARRIER);
864 865 866 867 868 869 870 871

    Return(match_info);
  }

  BIND(&if_failure);
  Return(NullConstant());
}

872 873 874 875
TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler) {
  TNode<JSRegExp> regexp = CAST(Parameter(Descriptor::kRegExp));
  TNode<String> string = CAST(Parameter(Descriptor::kString));
  TNode<Number> last_index = CAST(Parameter(Descriptor::kLastIndex));
876
  TNode<RegExpMatchInfo> match_info = CAST(Parameter(Descriptor::kMatchInfo));
877 878 879 880 881 882 883 884
  TNode<Context> context = CAST(Parameter(Descriptor::kContext));

  CSA_ASSERT(this, IsNumberNormalized(last_index));
  CSA_ASSERT(this, IsNumberPositive(last_index));

  Return(RegExpExecInternal(context, regexp, string, last_index, match_info));
}

885 886
TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
                                                   TNode<Object> regexp,
887
                                                   bool is_fastpath) {
888 889
  Isolate* isolate = this->isolate();

890
  const TNode<IntPtrT> int_one = IntPtrConstant(1);
891
  TVARIABLE(Uint32T, var_length, Uint32Constant(0));
892
  TVARIABLE(IntPtrT, var_flags);
893 894 895 896 897 898

  // First, count the number of characters we will need and check which flags
  // are set.

  if (is_fastpath) {
    // Refer to JSRegExp's flag property on the fast-path.
899
    CSA_ASSERT(this, IsJSRegExp(CAST(regexp)));
900
    const TNode<Smi> flags_smi =
901
        CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset));
902 903
    var_flags = SmiUntag(flags_smi);

904 905 906 907 908 909 910
#define CASE_FOR_FLAG(FLAG)                                        \
  do {                                                             \
    Label next(this);                                              \
    GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next);          \
    var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \
    Goto(&next);                                                   \
    BIND(&next);                                                   \
911 912 913 914 915
  } while (false)

    CASE_FOR_FLAG(JSRegExp::kGlobal);
    CASE_FOR_FLAG(JSRegExp::kIgnoreCase);
    CASE_FOR_FLAG(JSRegExp::kMultiline);
916
    CASE_FOR_FLAG(JSRegExp::kDotAll);
917 918 919 920 921 922 923
    CASE_FOR_FLAG(JSRegExp::kUnicode);
    CASE_FOR_FLAG(JSRegExp::kSticky);
#undef CASE_FOR_FLAG
  } else {
    DCHECK(!is_fastpath);

    // Fall back to GetProperty stub on the slow-path.
924
    var_flags = IntPtrZero();
925 926 927 928

#define CASE_FOR_FLAG(NAME, FLAG)                                          \
  do {                                                                     \
    Label next(this);                                                      \
929
    const TNode<Object> flag = GetProperty(                                \
930 931 932
        context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
    Label if_isflagset(this);                                              \
    BranchIfToBooleanIsTrue(flag, &if_isflagset, &next);                   \
933
    BIND(&if_isflagset);                                                   \
934
    var_length = Uint32Add(var_length.value(), Uint32Constant(1));         \
935
    var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG)));   \
936
    Goto(&next);                                                           \
937
    BIND(&next);                                                           \
938 939 940 941 942
  } while (false)

    CASE_FOR_FLAG("global", JSRegExp::kGlobal);
    CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
    CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
943
    CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
944 945 946 947 948 949 950 951 952
    CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
    CASE_FOR_FLAG("sticky", JSRegExp::kSticky);
#undef CASE_FOR_FLAG
  }

  // Allocate a string of the required length and fill it with the corresponding
  // char for each set flag.

  {
953
    const TNode<String> result = AllocateSeqOneByteString(var_length.value());
954

955 956
    TVARIABLE(IntPtrT, var_offset,
              IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
957 958 959 960

#define CASE_FOR_FLAG(FLAG, CHAR)                              \
  do {                                                         \
    Label next(this);                                          \
961
    GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next);      \
962
    const TNode<Int32T> value = Int32Constant(CHAR);           \
963 964
    StoreNoWriteBarrier(MachineRepresentation::kWord8, result, \
                        var_offset.value(), value);            \
965
    var_offset = IntPtrAdd(var_offset.value(), int_one);       \
966
    Goto(&next);                                               \
967
    BIND(&next);                                               \
968 969 970 971 972
  } while (false)

    CASE_FOR_FLAG(JSRegExp::kGlobal, 'g');
    CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i');
    CASE_FOR_FLAG(JSRegExp::kMultiline, 'm');
973
    CASE_FOR_FLAG(JSRegExp::kDotAll, 's');
974 975 976 977 978 979 980 981 982 983
    CASE_FOR_FLAG(JSRegExp::kUnicode, 'u');
    CASE_FOR_FLAG(JSRegExp::kSticky, 'y');
#undef CASE_FOR_FLAG

    return result;
  }
}

// ES#sec-regexpinitialize
// Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
984 985 986
TNode<Object> RegExpBuiltinsAssembler::RegExpInitialize(
    const TNode<Context> context, const TNode<JSRegExp> regexp,
    const TNode<Object> maybe_pattern, const TNode<Object> maybe_flags) {
987
  // Normalize pattern.
988
  const TNode<Object> pattern = Select<Object>(
989
      IsUndefined(maybe_pattern), [=] { return EmptyStringConstant(); },
990
      [=] { return ToString_Inline(context, maybe_pattern); });
991 992

  // Normalize flags.
993
  const TNode<Object> flags = Select<Object>(
994
      IsUndefined(maybe_flags), [=] { return EmptyStringConstant(); },
995
      [=] { return ToString_Inline(context, maybe_flags); });
996 997 998 999 1000 1001 1002 1003 1004 1005

  // Initialize.

  return CallRuntime(Runtime::kRegExpInitializeAndCompile, context, regexp,
                     pattern, flags);
}

// ES#sec-regexp-pattern-flags
// RegExp ( pattern, flags )
TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler) {
1006 1007 1008 1009
  TNode<Object> pattern = CAST(Parameter(Descriptor::kPattern));
  TNode<Object> flags = CAST(Parameter(Descriptor::kFlags));
  TNode<Object> new_target = CAST(Parameter(Descriptor::kJSNewTarget));
  TNode<Context> context = CAST(Parameter(Descriptor::kContext));
1010 1011 1012

  Isolate* isolate = this->isolate();

1013 1014 1015
  TVARIABLE(Object, var_flags, flags);
  TVARIABLE(Object, var_pattern, pattern);
  TVARIABLE(Object, var_new_target, new_target);
1016

1017
  TNode<NativeContext> native_context = LoadNativeContext(context);
1018 1019
  TNode<JSFunction> regexp_function =
      CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
1020

1021
  TNode<BoolT> pattern_is_regexp = IsRegExp(context, pattern);
1022 1023 1024 1025 1026

  {
    Label next(this);

    GotoIfNot(IsUndefined(new_target), &next);
1027
    var_new_target = regexp_function;
1028 1029 1030 1031

    GotoIfNot(pattern_is_regexp, &next);
    GotoIfNot(IsUndefined(flags), &next);

1032
    TNode<Object> value =
1033 1034
        GetProperty(context, pattern, isolate->factory()->constructor_string());

1035
    GotoIfNot(TaggedEqual(value, regexp_function), &next);
1036 1037
    Return(pattern);

1038
    BIND(&next);
1039 1040 1041 1042 1043 1044 1045
  }

  {
    Label next(this), if_patternisfastregexp(this),
        if_patternisslowregexp(this);
    GotoIf(TaggedIsSmi(pattern), &next);

1046
    GotoIf(IsJSRegExp(CAST(pattern)), &if_patternisfastregexp);
1047 1048 1049

    Branch(pattern_is_regexp, &if_patternisslowregexp, &next);

1050
    BIND(&if_patternisfastregexp);
1051
    {
1052
      TNode<Object> source =
1053
          LoadObjectField(CAST(pattern), JSRegExp::kSourceOffset);
1054
      var_pattern = source;
1055 1056 1057 1058 1059

      {
        Label inner_next(this);
        GotoIfNot(IsUndefined(flags), &inner_next);

1060
        var_flags = FlagsGetter(context, pattern, true);
1061 1062
        Goto(&inner_next);

1063
        BIND(&inner_next);
1064 1065 1066 1067 1068
      }

      Goto(&next);
    }

1069
    BIND(&if_patternisslowregexp);
1070
    {
1071 1072
      var_pattern =
          GetProperty(context, pattern, isolate->factory()->source_string());
1073 1074 1075 1076 1077

      {
        Label inner_next(this);
        GotoIfNot(IsUndefined(flags), &inner_next);

1078
        var_flags =
1079 1080 1081
            GetProperty(context, pattern, isolate->factory()->flags_string());
        Goto(&inner_next);

1082
        BIND(&inner_next);
1083 1084 1085 1086 1087
      }

      Goto(&next);
    }

1088
    BIND(&next);
1089 1090 1091 1092
  }

  // Allocate.

1093
  TVARIABLE(JSRegExp, var_regexp);
1094 1095 1096
  {
    Label allocate_jsregexp(this), allocate_generic(this, Label::kDeferred),
        next(this);
1097
    Branch(TaggedEqual(var_new_target.value(), regexp_function),
1098 1099
           &allocate_jsregexp, &allocate_generic);

1100
    BIND(&allocate_jsregexp);
1101
    {
1102
      const TNode<Map> initial_map = CAST(LoadObjectField(
1103
          regexp_function, JSFunction::kPrototypeOrInitialMapOffset));
1104
      var_regexp = CAST(AllocateJSObjectFromMap(initial_map));
1105 1106 1107
      Goto(&next);
    }

1108
    BIND(&allocate_generic);
1109 1110
    {
      ConstructorBuiltinsAssembler constructor_assembler(this->state());
1111 1112
      var_regexp = CAST(constructor_assembler.EmitFastNewObject(
          context, regexp_function, CAST(var_new_target.value())));
1113 1114 1115
      Goto(&next);
    }

1116
    BIND(&next);
1117 1118
  }

1119 1120
  const TNode<Object> result = RegExpInitialize(
      context, var_regexp.value(), var_pattern.value(), var_flags.value());
1121 1122 1123 1124 1125 1126
  Return(result);
}

// ES#sec-regexp.prototype.compile
// RegExp.prototype.compile ( pattern, flags )
TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler) {
1127 1128 1129 1130
  TNode<Object> maybe_receiver = CAST(Parameter(Descriptor::kReceiver));
  TNode<Object> maybe_pattern = CAST(Parameter(Descriptor::kPattern));
  TNode<Object> maybe_flags = CAST(Parameter(Descriptor::kFlags));
  TNode<Context> context = CAST(Parameter(Descriptor::kContext));
1131

1132
  ThrowIfNotInstanceType(context, maybe_receiver, JS_REG_EXP_TYPE,
1133
                         "RegExp.prototype.compile");
1134
  const TNode<JSRegExp> receiver = CAST(maybe_receiver);
1135

1136 1137
  TVARIABLE(Object, var_flags, maybe_flags);
  TVARIABLE(Object, var_pattern, maybe_pattern);
1138 1139 1140 1141 1142 1143

  // Handle a JSRegExp pattern.
  {
    Label next(this);

    GotoIf(TaggedIsSmi(maybe_pattern), &next);
1144
    GotoIfNot(IsJSRegExp(CAST(maybe_pattern)), &next);
1145 1146 1147 1148 1149 1150

    // {maybe_flags} must be undefined in this case, otherwise throw.
    {
      Label next(this);
      GotoIf(IsUndefined(maybe_flags), &next);

1151
      ThrowTypeError(context, MessageTemplate::kRegExpFlags);
1152

1153
      BIND(&next);
1154 1155
    }

1156
    const TNode<JSRegExp> pattern = CAST(maybe_pattern);
1157 1158
    const TNode<String> new_flags = FlagsGetter(context, pattern, true);
    const TNode<Object> new_pattern =
1159
        LoadObjectField(pattern, JSRegExp::kSourceOffset);
1160

1161 1162
    var_flags = new_flags;
    var_pattern = new_pattern;
1163 1164

    Goto(&next);
1165
    BIND(&next);
1166 1167
  }

1168 1169
  const TNode<Object> result = RegExpInitialize(
      context, receiver, var_pattern.value(), var_flags.value());
1170 1171 1172 1173
  Return(result);
}

// Fast-path implementation for flag checks on an unmodified JSRegExp instance.
1174 1175
TNode<BoolT> RegExpBuiltinsAssembler::FastFlagGetter(TNode<JSRegExp> regexp,
                                                     JSRegExp::Flag flag) {
1176 1177
  TNode<Smi> flags = CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
  TNode<Smi> mask = SmiConstant(flag);
1178 1179 1180
  return ReinterpretCast<BoolT>(SmiToInt32(
      SmiShr(SmiAnd(flags, mask),
             base::bits::CountTrailingZeros(static_cast<int>(flag)))));
1181 1182 1183
}

// Load through the GetProperty stub.
1184 1185 1186
TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
                                                     TNode<Object> regexp,
                                                     JSRegExp::Flag flag) {
1187
  Label out(this);
1188
  TVARIABLE(BoolT, var_result);
1189 1190 1191 1192

  Handle<String> name;
  switch (flag) {
    case JSRegExp::kGlobal:
1193
      name = isolate()->factory()->global_string();
1194 1195
      break;
    case JSRegExp::kIgnoreCase:
1196
      name = isolate()->factory()->ignoreCase_string();
1197 1198
      break;
    case JSRegExp::kMultiline:
1199
      name = isolate()->factory()->multiline_string();
1200
      break;
1201 1202 1203
    case JSRegExp::kDotAll:
      UNREACHABLE();  // Never called for dotAll.
      break;
1204
    case JSRegExp::kSticky:
1205
      name = isolate()->factory()->sticky_string();
1206 1207
      break;
    case JSRegExp::kUnicode:
1208
      name = isolate()->factory()->unicode_string();
1209 1210 1211 1212 1213
      break;
    default:
      UNREACHABLE();
  }

1214
  TNode<Object> value = GetProperty(context, regexp, name);
1215 1216 1217 1218

  Label if_true(this), if_false(this);
  BranchIfToBooleanIsTrue(value, &if_true, &if_false);

1219
  BIND(&if_true);
1220
  var_result = BoolConstant(true);
1221
  Goto(&out);
1222

1223
  BIND(&if_false);
1224
  var_result = BoolConstant(false);
1225
  Goto(&out);
1226

1227
  BIND(&out);
1228 1229 1230
  return var_result.value();
}

1231 1232 1233 1234
TNode<BoolT> RegExpBuiltinsAssembler::FlagGetter(TNode<Context> context,
                                                 TNode<Object> regexp,
                                                 JSRegExp::Flag flag,
                                                 bool is_fastpath) {
1235
  return is_fastpath ? FastFlagGetter(CAST(regexp), flag)
1236 1237 1238
                     : SlowFlagGetter(context, regexp, flag);
}

1239 1240 1241
TNode<Number> RegExpBuiltinsAssembler::AdvanceStringIndex(
    SloppyTNode<String> string, SloppyTNode<Number> index,
    SloppyTNode<BoolT> is_unicode, bool is_fastpath) {
1242
  CSA_ASSERT(this, IsString(string));
1243
  CSA_ASSERT(this, IsNumberNormalized(index));
1244 1245
  if (is_fastpath) CSA_ASSERT(this, TaggedIsPositiveSmi(index));

1246
  // Default to last_index + 1.
1247 1248
  // TODO(pwong): Consider using TrySmiAdd for the fast path to reduce generated
  // code.
1249 1250
  TNode<Number> index_plus_one = NumberInc(index);
  TVARIABLE(Number, var_result, index_plus_one);
1251

1252 1253 1254
  // TODO(v8:9880): Given that we have to convert index from Number to UintPtrT
  // anyway, consider using UintPtrT index to simplify the code below.

1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
  // Advancing the index has some subtle issues involving the distinction
  // between Smis and HeapNumbers. There's three cases:
  // * {index} is a Smi, {index_plus_one} is a Smi. The standard case.
  // * {index} is a Smi, {index_plus_one} overflows into a HeapNumber.
  //   In this case we can return the result early, because
  //   {index_plus_one} > {string}.length.
  // * {index} is a HeapNumber, {index_plus_one} is a HeapNumber. This can only
  //   occur when {index} is outside the Smi range since we normalize
  //   explicitly. Again we can return early.
  if (is_fastpath) {
    // Must be in Smi range on the fast path. We control the value of {index}
    // on all call-sites and can never exceed the length of the string.
    STATIC_ASSERT(String::kMaxLength + 2 < Smi::kMaxValue);
    CSA_ASSERT(this, TaggedIsPositiveSmi(index_plus_one));
  }

1271
  Label if_isunicode(this), out(this);
1272 1273 1274 1275
  GotoIfNot(is_unicode, &out);

  // Keep this unconditional (even on the fast path) just to be safe.
  Branch(TaggedIsPositiveSmi(index_plus_one), &if_isunicode, &out);
1276

1277
  BIND(&if_isunicode);
1278
  {
1279 1280 1281 1282
    TNode<UintPtrT> string_length = Unsigned(LoadStringLengthAsWord(string));
    TNode<UintPtrT> untagged_plus_one =
        Unsigned(SmiUntag(CAST(index_plus_one)));
    GotoIfNot(UintPtrLessThan(untagged_plus_one, string_length), &out);
1283

1284 1285
    TNode<Int32T> lead =
        StringCharCodeAt(string, Unsigned(SmiUntag(CAST(index))));
1286 1287 1288 1289
    GotoIfNot(Word32Equal(Word32And(lead, Int32Constant(0xFC00)),
                          Int32Constant(0xD800)),
              &out);

1290
    TNode<Int32T> trail = StringCharCodeAt(string, untagged_plus_one);
1291 1292 1293 1294 1295
    GotoIfNot(Word32Equal(Word32And(trail, Int32Constant(0xFC00)),
                          Int32Constant(0xDC00)),
              &out);

    // At a surrogate pair, return index + 2.
1296 1297
    TNode<Number> index_plus_two = NumberInc(index_plus_one);
    var_result = index_plus_two;
1298 1299 1300 1301

    Goto(&out);
  }

1302
  BIND(&out);
1303 1304 1305
  return var_result.value();
}

1306 1307 1308
// ES#sec-createregexpstringiterator
// CreateRegExpStringIterator ( R, S, global, fullUnicode )
TNode<Object> RegExpMatchAllAssembler::CreateRegExpStringIterator(
1309 1310
    TNode<NativeContext> native_context, TNode<Object> regexp,
    TNode<String> string, TNode<BoolT> global, TNode<BoolT> full_unicode) {
1311 1312 1313 1314 1315 1316 1317
  TNode<Map> map = CAST(LoadContextElement(
      native_context,
      Context::INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX));

  // 4. Let iterator be ObjectCreate(%RegExpStringIteratorPrototype%, «
  // [[IteratingRegExp]], [[IteratedString]], [[Global]], [[Unicode]],
  // [[Done]] »).
1318
  TNode<HeapObject> iterator = Allocate(JSRegExpStringIterator::kHeaderSize);
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
  StoreMapNoWriteBarrier(iterator, map);
  StoreObjectFieldRoot(iterator,
                       JSRegExpStringIterator::kPropertiesOrHashOffset,
                       RootIndex::kEmptyFixedArray);
  StoreObjectFieldRoot(iterator, JSRegExpStringIterator::kElementsOffset,
                       RootIndex::kEmptyFixedArray);

  // 5. Set iterator.[[IteratingRegExp]] to R.
  StoreObjectFieldNoWriteBarrier(
      iterator, JSRegExpStringIterator::kIteratingRegExpOffset, regexp);

  // 6. Set iterator.[[IteratedString]] to S.
  StoreObjectFieldNoWriteBarrier(
      iterator, JSRegExpStringIterator::kIteratedStringOffset, string);
1333

1334 1335 1336
  // 7. Set iterator.[[Global]] to global.
  // 8. Set iterator.[[Unicode]] to fullUnicode.
  // 9. Set iterator.[[Done]] to false.
1337
  TNode<Int32T> global_flag =
1338 1339 1340 1341 1342
      Word32Shl(ReinterpretCast<Int32T>(global),
                Int32Constant(JSRegExpStringIterator::kGlobalBit));
  TNode<Int32T> unicode_flag =
      Word32Shl(ReinterpretCast<Int32T>(full_unicode),
                Int32Constant(JSRegExpStringIterator::kUnicodeBit));
1343
  TNode<Int32T> iterator_flags = Word32Or(global_flag, unicode_flag);
1344
  StoreObjectFieldNoWriteBarrier(iterator, JSRegExpStringIterator::kFlagsOffset,
1345
                                 SmiFromInt32(iterator_flags));
1346 1347

  return iterator;
1348 1349
}

1350 1351
// Generates the fast path for @@split. {regexp} is an unmodified, non-sticky
// JSRegExp, {string} is a String, and {limit} is a Smi.
1352 1353
TNode<JSArray> RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(
    TNode<Context> context, TNode<JSRegExp> regexp, TNode<String> string,
1354
    const TNode<Smi> limit) {
1355 1356
  CSA_ASSERT(this, IsFastRegExpPermissive(context, regexp));
  CSA_ASSERT(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky)));
1357

1358
  const TNode<IntPtrT> int_limit = SmiUntag(limit);
1359

1360
  const ElementsKind kind = PACKED_ELEMENTS;
1361 1362
  const ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;

1363
  TNode<AllocationSite> allocation_site = {};
1364
  const TNode<NativeContext> native_context = LoadNativeContext(context);
1365
  TNode<Map> array_map = LoadJSArrayElementsMap(kind, native_context);
1366 1367

  Label return_empty_array(this, Label::kDeferred);
1368 1369
  TVARIABLE(JSArray, var_result);
  Label done(this);
1370 1371 1372 1373

  // If limit is zero, return an empty array.
  {
    Label next(this), if_limitiszero(this, Label::kDeferred);
1374
    Branch(SmiEqual(limit, SmiZero()), &return_empty_array, &next);
1375
    BIND(&next);
1376 1377
  }

1378
  const TNode<Smi> string_length = LoadStringLengthAsSmi(string);
1379 1380 1381 1382 1383

  // If passed the empty {string}, return either an empty array or a singleton
  // array depending on whether the {regexp} matches.
  {
    Label next(this), if_stringisempty(this, Label::kDeferred);
1384
    Branch(SmiEqual(string_length, SmiZero()), &if_stringisempty, &next);
1385

1386
    BIND(&if_stringisempty);
1387
    {
1388
      const TNode<Object> last_match_info = LoadContextElement(
1389 1390
          native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);

1391
      const TNode<Object> match_indices =
1392
          CallBuiltin(Builtins::kRegExpExecInternal, context, regexp, string,
1393
                      SmiZero(), last_match_info);
1394 1395

      Label return_singleton_array(this);
1396
      Branch(IsNull(match_indices), &return_singleton_array,
1397 1398
             &return_empty_array);

1399
      BIND(&return_singleton_array);
1400
      {
1401 1402
        TNode<Smi> length = SmiConstant(1);
        TNode<IntPtrT> capacity = IntPtrConstant(1);
1403 1404
        var_result =
            AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1405

1406
        TNode<FixedArray> fixed_array = CAST(LoadElements(var_result.value()));
1407
        UnsafeStoreFixedArrayElement(fixed_array, 0, string);
1408

1409
        Goto(&done);
1410 1411 1412
      }
    }

1413
    BIND(&next);
1414 1415 1416 1417
  }

  // Loop preparations.

1418
  GrowableFixedArray array(state());
1419

1420 1421
  TVARIABLE(Smi, var_last_matched_until, SmiZero());
  TVARIABLE(Smi, var_next_search_from, SmiZero());
1422

1423 1424 1425
  Label loop(this, {array.var_array(), array.var_length(), array.var_capacity(),
                    &var_last_matched_until, &var_next_search_from}),
      push_suffix_and_out(this), out(this);
1426 1427
  Goto(&loop);

1428
  BIND(&loop);
1429
  {
1430 1431
    const TNode<Smi> next_search_from = var_next_search_from.value();
    const TNode<Smi> last_matched_until = var_last_matched_until.value();
1432

1433 1434 1435 1436 1437
    // We're done if we've reached the end of the string.
    {
      Label next(this);
      Branch(SmiEqual(next_search_from, string_length), &push_suffix_and_out,
             &next);
1438
      BIND(&next);
1439 1440 1441 1442
    }

    // Search for the given {regexp}.

1443
    const TNode<Object> last_match_info = LoadContextElement(
1444 1445
        native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);

1446
    const TNode<HeapObject> match_indices_ho =
1447 1448
        CAST(CallBuiltin(Builtins::kRegExpExecInternal, context, regexp, string,
                         next_search_from, last_match_info));
1449 1450 1451 1452

    // We're done if no match was found.
    {
      Label next(this);
1453
      Branch(IsNull(match_indices_ho), &push_suffix_and_out, &next);
1454
      BIND(&next);
1455 1456
    }

1457
    TNode<FixedArray> match_indices = CAST(match_indices_ho);
1458
    const TNode<Smi> match_from = CAST(UnsafeLoadFixedArrayElement(
1459
        match_indices, RegExpMatchInfo::kFirstCaptureIndex));
1460 1461 1462 1463

    // We're done if the match starts beyond the string.
    {
      Label next(this);
1464
      Branch(SmiEqual(match_from, string_length), &push_suffix_and_out, &next);
1465
      BIND(&next);
1466 1467
    }

1468
    const TNode<Smi> match_to = CAST(UnsafeLoadFixedArrayElement(
1469
        match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1));
1470 1471 1472 1473 1474 1475 1476 1477

    // Advance index and continue if the match is empty.
    {
      Label next(this);

      GotoIfNot(SmiEqual(match_to, next_search_from), &next);
      GotoIfNot(SmiEqual(match_to, last_matched_until), &next);

1478
      const TNode<BoolT> is_unicode =
1479
          FastFlagGetter(regexp, JSRegExp::kUnicode);
1480
      const TNode<Number> new_next_search_from =
1481
          AdvanceStringIndex(string, next_search_from, is_unicode, true);
1482
      var_next_search_from = CAST(new_next_search_from);
1483 1484
      Goto(&loop);

1485
      BIND(&next);
1486 1487 1488 1489
    }

    // A valid match was found, add the new substring to the array.
    {
1490 1491
      const TNode<Smi> from = last_matched_until;
      const TNode<Smi> to = match_from;
1492
      array.Push(CallBuiltin(Builtins::kSubString, context, string, from, to));
1493 1494 1495 1496 1497
      GotoIf(WordEqual(array.length(), int_limit), &out);
    }

    // Add all captures to the array.
    {
1498
      const TNode<Smi> num_registers = CAST(LoadFixedArrayElement(
1499
          match_indices, RegExpMatchInfo::kNumberOfCapturesIndex));
1500
      const TNode<IntPtrT> int_num_registers = SmiUntag(num_registers);
1501

1502
      TVARIABLE(IntPtrT, var_reg, IntPtrConstant(2));
1503

1504 1505 1506
      Label nested_loop(this, {array.var_array(), array.var_length(),
                               array.var_capacity(), &var_reg}),
          nested_loop_out(this);
1507 1508 1509
      Branch(IntPtrLessThan(var_reg.value(), int_num_registers), &nested_loop,
             &nested_loop_out);

1510
      BIND(&nested_loop);
1511
      {
1512
        const TNode<IntPtrT> reg = var_reg.value();
1513
        const TNode<Object> from = LoadFixedArrayElement(
1514
            match_indices, reg,
1515
            RegExpMatchInfo::kFirstCaptureIndex * kTaggedSize, mode);
1516
        const TNode<Smi> to = CAST(LoadFixedArrayElement(
1517
            match_indices, reg,
1518
            (RegExpMatchInfo::kFirstCaptureIndex + 1) * kTaggedSize, mode));
1519 1520

        Label select_capture(this), select_undefined(this), store_value(this);
1521
        TVARIABLE(Object, var_value);
1522 1523 1524
        Branch(SmiEqual(to, SmiConstant(-1)), &select_undefined,
               &select_capture);

1525
        BIND(&select_capture);
1526
        {
1527 1528
          var_value =
              CallBuiltin(Builtins::kSubString, context, string, from, to);
1529 1530 1531
          Goto(&store_value);
        }

1532
        BIND(&select_undefined);
1533
        {
1534
          var_value = UndefinedConstant();
1535 1536 1537
          Goto(&store_value);
        }

1538
        BIND(&store_value);
1539
        {
1540
          array.Push(var_value.value());
1541 1542
          GotoIf(WordEqual(array.length(), int_limit), &out);

1543 1544
          const TNode<IntPtrT> new_reg = IntPtrAdd(reg, IntPtrConstant(2));
          var_reg = new_reg;
1545 1546 1547 1548 1549 1550

          Branch(IntPtrLessThan(new_reg, int_num_registers), &nested_loop,
                 &nested_loop_out);
        }
      }

1551
      BIND(&nested_loop_out);
1552 1553
    }

1554 1555
    var_last_matched_until = match_to;
    var_next_search_from = match_to;
1556 1557 1558
    Goto(&loop);
  }

1559
  BIND(&push_suffix_and_out);
1560
  {
1561 1562
    const TNode<Smi> from = var_last_matched_until.value();
    const TNode<Smi> to = string_length;
1563
    array.Push(CallBuiltin(Builtins::kSubString, context, string, from, to));
1564 1565 1566
    Goto(&out);
  }

1567
  BIND(&out);
1568
  {
1569 1570
    var_result = array.ToJSArray(context);
    Goto(&done);
1571 1572
  }

1573
  BIND(&return_empty_array);
1574
  {
1575 1576
    TNode<Smi> length = SmiZero();
    TNode<IntPtrT> capacity = IntPtrZero();
1577 1578
    var_result =
        AllocateJSArray(kind, array_map, capacity, length, allocation_site);
1579
    Goto(&done);
1580 1581
  }

1582 1583
  BIND(&done);
  return var_result.value();
1584 1585 1586 1587
}

}  // namespace internal
}  // namespace v8