regexp-match.tq 5.61 KB
Newer Older
1 2 3 4 5 6 7 8
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include 'src/builtins/builtins-regexp-gen.h'

namespace regexp {

9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
const kATOM: constexpr int31
    generates 'JSRegExp::ATOM';
const kTagIndex: constexpr int31
    generates 'JSRegExp::kTagIndex';
const kAtomPatternIndex: constexpr int31
    generates 'JSRegExp::kAtomPatternIndex';

extern transitioning macro RegExpBuiltinsAssembler::FlagGetter(
    implicit context: Context)(Object, constexpr Flag, constexpr bool): bool;

extern macro UnsafeLoadFixedArrayElement(
    RegExpMatchInfo, constexpr int31): Object;

transitioning macro RegExpPrototypeMatchBody(implicit context: Context)(
    regexp: JSReceiver, string: String, isFastPath: constexpr bool): JSAny {
  if constexpr (isFastPath) {
    assert(Is<FastJSRegExp>(regexp));
  }
27

28
  const isGlobal: bool = FlagGetter(regexp, Flag::kGlobal, isFastPath);
29

30 31 32 33
  if (!isGlobal) {
    return isFastPath ? RegExpPrototypeExecBodyFast(regexp, string) :
                        RegExpExec(regexp, string);
  }
34

35 36
  assert(isGlobal);
  const isUnicode: bool = FlagGetter(regexp, Flag::kUnicode, isFastPath);
37

38
  StoreLastIndex(regexp, 0, isFastPath);
39

40
  // Allocate an array to store the resulting match strings.
41

42
  let array = growable_fixed_array::NewGrowableFixedArray();
43

44 45 46 47 48 49 50 51 52 53
  // Check if the regexp is an ATOM type. If so, then keep the literal string
  // to search for so that we can avoid calling substring in the loop below.
  let atom: bool = false;
  let searchString: String = EmptyStringConstant();
  if constexpr (isFastPath) {
    const maybeAtomRegexp = UnsafeCast<JSRegExp>(regexp);
    const data = UnsafeCast<FixedArray>(maybeAtomRegexp.data);
    if (UnsafeCast<Smi>(data.objects[kTagIndex]) == kATOM) {
      searchString = UnsafeCast<String>(data.objects[kAtomPatternIndex]);
      atom = true;
54
    }
55
  }
56

57 58 59 60 61 62 63 64 65 66 67
  while (true) {
    let match: String = EmptyStringConstant();
    try {
      if constexpr (isFastPath) {
        // On the fast path, grab the matching string from the raw match index
        // array.
        const matchIndices: RegExpMatchInfo =
            RegExpPrototypeExecBodyWithoutResultFast(
                UnsafeCast<JSRegExp>(regexp), string) otherwise IfDidNotMatch;
        if (atom) {
          match = searchString;
68
        } else {
69 70 71 72 73 74
          const matchFrom = UnsafeLoadFixedArrayElement(
              matchIndices, kRegExpMatchInfoFirstCaptureIndex);
          const matchTo = UnsafeLoadFixedArrayElement(
              matchIndices, kRegExpMatchInfoFirstCaptureIndex + 1);
          match = SubString(
              string, UnsafeCast<Smi>(matchFrom), UnsafeCast<Smi>(matchTo));
75
        }
76 77 78 79 80
      } else {
        assert(!isFastPath);
        const resultTemp = RegExpExec(regexp, string);
        if (resultTemp == Null) {
          goto IfDidNotMatch;
81
        }
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
        match = ToString_Inline(GetProperty(resultTemp, SmiConstant(0)));
      }
      goto IfDidMatch;
    } label IfDidNotMatch {
      return array.length == 0 ? Null : array.ToJSArray();
    } label IfDidMatch {
      // Store the match, growing the fixed array if needed.

      array.Push(match);

      // Advance last index if the match is the empty string.
      const matchLength: Smi = match.length_smi;
      if (matchLength != 0) {
        continue;
      }
      let lastIndex = LoadLastIndex(regexp, isFastPath);
      if constexpr (isFastPath) {
        assert(TaggedIsPositiveSmi(lastIndex));
      } else {
        lastIndex = ToLength_Inline(lastIndex);
      }
103

104 105 106 107 108 109 110
      const newLastIndex: Number = AdvanceStringIndex(
          string, UnsafeCast<Number>(lastIndex), isUnicode, isFastPath);

      if constexpr (isFastPath) {
        // On the fast path, we can be certain that lastIndex can never be
        // incremented to overflow the Smi range since the maximal string
        // length is less than the maximal Smi value.
111
        StaticAssertStringLengthFitsSmi();
112
        assert(TaggedIsPositiveSmi(newLastIndex));
113 114
      }

115 116
      StoreLastIndex(regexp, newLastIndex, isFastPath);
    }
117
  }
118

119 120
  VerifiedUnreachable();
}
121

122 123 124 125
transitioning macro FastRegExpPrototypeMatchBody(implicit context: Context)(
    receiver: FastJSRegExp, string: String): JSAny {
  return RegExpPrototypeMatchBody(receiver, string, true);
}
126

127 128 129 130
transitioning macro SlowRegExpPrototypeMatchBody(implicit context: Context)(
    receiver: JSReceiver, string: String): JSAny {
  return RegExpPrototypeMatchBody(receiver, string, false);
}
131

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
// Helper that skips a few initial checks. and assumes...
// 1) receiver is a "fast" RegExp
// 2) pattern is a string
transitioning builtin RegExpMatchFast(implicit context: Context)(
    receiver: FastJSRegExp, string: String): JSAny {
  return FastRegExpPrototypeMatchBody(receiver, string);
}

// ES#sec-regexp.prototype-@@match
// RegExp.prototype [ @@match ] ( string )
transitioning javascript builtin RegExpPrototypeMatch(
    js-implicit context: NativeContext, receiver: JSAny)(string: JSAny): JSAny {
  ThrowIfNotJSReceiver(
      receiver, MessageTemplate::kIncompatibleMethodReceiver,
      'RegExp.prototype.@@match');
  const receiver = UnsafeCast<JSReceiver>(receiver);
  const string: String = ToString_Inline(string);

  // Strict: Reads global and unicode properties.
  // TODO(jgruber): Handle slow flag accesses on the fast path and make this
  // permissive.
  const fastRegExp = Cast<FastJSRegExp>(receiver)
      otherwise return SlowRegExpPrototypeMatchBody(receiver, string);

  // TODO(pwong): Could be optimized to remove the overhead of calling the
  //              builtin (at the cost of a larger builtin).
  return RegExpMatchFast(fastRegExp, string);
}
160
}