regexp-utils.cc 7.87 KB
Newer Older
1 2 3 4 5 6
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/regexp/regexp-utils.h"

7
#include "src/heap/factory.h"
8 9
#include "src/isolate.h"
#include "src/objects-inl.h"
10
#include "src/objects/js-regexp-inl.h"
11 12 13 14 15
#include "src/regexp/jsregexp.h"

namespace v8 {
namespace internal {

16 17 18
Handle<String> RegExpUtils::GenericCaptureGetter(
    Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture,
    bool* ok) {
19
  const int index = capture * 2;
20
  if (index >= match_info->NumberOfCaptureRegisters()) {
21 22 23 24
    if (ok != nullptr) *ok = false;
    return isolate->factory()->empty_string();
  }

25 26
  const int match_start = match_info->Capture(index);
  const int match_end = match_info->Capture(index + 1);
27 28 29 30 31 32
  if (match_start == -1 || match_end == -1) {
    if (ok != nullptr) *ok = false;
    return isolate->factory()->empty_string();
  }

  if (ok != nullptr) *ok = true;
33
  Handle<String> last_subject(match_info->LastSubject(), isolate);
34 35 36 37 38 39 40 41 42 43 44 45 46
  return isolate->factory()->NewSubString(last_subject, match_start, match_end);
}

namespace {

V8_INLINE bool HasInitialRegExpMap(Isolate* isolate, Handle<JSReceiver> recv) {
  return recv->map() == isolate->regexp_function()->initial_map();
}

}  // namespace

MaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate,
                                              Handle<JSReceiver> recv,
47 48 49
                                              uint64_t value) {
  Handle<Object> value_as_object =
      isolate->factory()->NewNumberFromInt64(value);
50
  if (HasInitialRegExpMap(isolate, recv)) {
51
    JSRegExp::cast(*recv)->set_last_index(*value_as_object, SKIP_WRITE_BARRIER);
52 53
    return recv;
  } else {
54 55
    return Object::SetProperty(isolate, recv,
                               isolate->factory()->lastIndex_string(),
56
                               value_as_object, LanguageMode::kStrict);
57 58 59 60 61 62
  }
}

MaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate,
                                              Handle<JSReceiver> recv) {
  if (HasInitialRegExpMap(isolate, recv)) {
63
    return handle(JSRegExp::cast(*recv)->last_index(), isolate);
64
  } else {
65 66
    return Object::GetProperty(isolate, recv,
                               isolate->factory()->lastIndex_string());
67 68 69 70 71 72 73 74 75 76 77 78 79
  }
}

// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
// Also takes an optional exec method in case our caller
// has already fetched exec.
MaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate,
                                            Handle<JSReceiver> regexp,
                                            Handle<String> string,
                                            Handle<Object> exec) {
  if (exec->IsUndefined(isolate)) {
    ASSIGN_RETURN_ON_EXCEPTION(
        isolate, exec,
80 81
        Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()),
        Object);
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
  }

  if (exec->IsCallable()) {
    const int argc = 1;
    ScopedVector<Handle<Object>> argv(argc);
    argv[0] = string;

    Handle<Object> result;
    ASSIGN_RETURN_ON_EXCEPTION(
        isolate, result,
        Execution::Call(isolate, exec, regexp, argc, argv.start()), Object);

    if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
      THROW_NEW_ERROR(isolate,
                      NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
                      Object);
    }
    return result;
  }

  if (!regexp->IsJSRegExp()) {
    THROW_NEW_ERROR(isolate,
                    NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
                                 isolate->factory()->NewStringFromAsciiChecked(
                                     "RegExp.prototype.exec"),
                                 regexp),
                    Object);
  }

  {
    Handle<JSFunction> regexp_exec = isolate->regexp_exec_function();

    const int argc = 1;
    ScopedVector<Handle<Object>> argv(argc);
    argv[0] = string;

118
    return Execution::Call(isolate, regexp_exec, regexp, argc, argv.start());
119 120 121 122 123 124 125 126 127 128 129
  }
}

Maybe<bool> RegExpUtils::IsRegExp(Isolate* isolate, Handle<Object> object) {
  if (!object->IsJSReceiver()) return Just(false);

  Handle<JSReceiver> receiver = Handle<JSReceiver>::cast(object);

  Handle<Object> match;
  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
      isolate, match,
130 131
      JSObject::GetProperty(isolate, receiver,
                            isolate->factory()->match_symbol()),
132 133
      Nothing<bool>());

134 135 136 137 138 139 140 141 142 143 144 145
  if (!match->IsUndefined(isolate)) {
    const bool match_as_boolean = match->BooleanValue(isolate);

    if (match_as_boolean && !object->IsJSRegExp()) {
      isolate->CountUsage(v8::Isolate::kRegExpMatchIsTrueishOnNonJSRegExp);
    } else if (!match_as_boolean && object->IsJSRegExp()) {
      isolate->CountUsage(v8::Isolate::kRegExpMatchIsFalseishOnJSRegExp);
    }

    return Just(match_as_boolean);
  }

146 147 148
  return Just(object->IsJSRegExp());
}

149
bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
150
#ifdef V8_ENABLE_FORCE_SLOW_PATH
151 152 153
  if (isolate->force_slow_path()) return false;
#endif

154
  if (!obj->IsJSReceiver()) return false;
155

156
  JSReceiver recv = JSReceiver::cast(*obj);
157 158 159 160 161 162

  // Check the receiver's map.
  Handle<JSFunction> regexp_function = isolate->regexp_function();
  if (recv->map() != regexp_function->initial_map()) return false;

  // Check the receiver's prototype's map.
163
  Object proto = recv->map()->prototype();
164 165 166
  if (!proto->IsJSReceiver()) return false;

  Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map();
167
  Map proto_map = JSReceiver::cast(proto)->map();
168
  if (proto_map != *initial_proto_initial_map) {
169 170 171
    return false;
  }

172 173 174 175 176 177 178 179 180 181 182 183 184 185
  // Check that the "exec" method is unmodified.
  if (FLAG_track_constant_fields) {
    // Check that the index refers to "exec" method (this has to be consistent
    // with the init order in the bootstrapper).
    DCHECK_EQ(*(isolate->factory()->exec_string()),
              proto_map->instance_descriptors()->GetKey(
                  JSRegExp::kExecFunctionDescriptorIndex));
    if (proto_map->instance_descriptors()
            ->GetDetails(JSRegExp::kExecFunctionDescriptorIndex)
            .constness() != PropertyConstness::kConst) {
      return false;
    }
  }

186 187
  if (!isolate->IsRegExpSpeciesLookupChainIntact()) return false;

188 189
  // The smi check is required to omit ToLength(lastIndex) calls with possible
  // user-code execution on the fast path.
190
  Object last_index = JSRegExp::cast(recv)->last_index();
jgruber's avatar
jgruber committed
191
  return last_index->IsSmi() && Smi::ToInt(last_index) >= 0;
192 193
}

194
uint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index,
195 196 197 198 199 200 201 202
                                         bool unicode) {
  DCHECK_LE(static_cast<double>(index), kMaxSafeInteger);
  const uint64_t string_length = static_cast<uint64_t>(string->length());
  if (unicode && index < string_length) {
    const uint16_t first = string->Get(static_cast<uint32_t>(index));
    if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) {
      DCHECK_LT(index, std::numeric_limits<uint64_t>::max());
      const uint16_t second = string->Get(static_cast<uint32_t>(index + 1));
203
      if (second >= 0xDC00 && second <= 0xDFFF) {
204
        return index + 2;
205 206 207 208
      }
    }
  }

209
  return index + 1;
210 211 212 213 214 215 216 217
}

MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex(
    Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
    bool unicode) {
  Handle<Object> last_index_obj;
  ASSIGN_RETURN_ON_EXCEPTION(
      isolate, last_index_obj,
218 219
      Object::GetProperty(isolate, regexp,
                          isolate->factory()->lastIndex_string()),
220 221 222 223
      Object);

  ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
                             Object::ToLength(isolate, last_index_obj), Object);
224 225
  const uint64_t last_index = PositiveNumberToUint64(*last_index_obj);
  const uint64_t new_last_index =
226
      AdvanceStringIndex(string, last_index, unicode);
227 228 229 230 231 232

  return SetLastIndex(isolate, regexp, new_last_index);
}

}  // namespace internal
}  // namespace v8