// Copyright 2020 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/regexp/experimental/experimental.h" #include "src/common/assert-scope.h" #include "src/objects/js-regexp-inl.h" #include "src/regexp/experimental/experimental-compiler.h" #include "src/regexp/experimental/experimental-interpreter.h" #include "src/regexp/regexp-parser.h" #include "src/utils/ostreams.h" namespace v8 { namespace internal { bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, RegExpFlags flags, int capture_count) { DCHECK(FLAG_enable_experimental_regexp_engine || FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count); } void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re, Handle<String> source, RegExpFlags flags, int capture_count) { DCHECK(FLAG_enable_experimental_regexp_engine); if (FLAG_trace_experimental_regexp_engine) { StdoutStream{} << "Initializing experimental regexp " << *source << std::endl; } isolate->factory()->SetRegExpExperimentalData( re, source, JSRegExp::AsJSRegExpFlags(flags), capture_count); } bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) { DCHECK(FLAG_enable_experimental_regexp_engine); DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP re->JSRegExpVerify(isolate); #endif static constexpr bool kIsLatin1 = true; return re->bytecode(kIsLatin1) != Smi::FromInt(JSRegExp::kUninitializedValue); } template <class T> Handle<ByteArray> VectorToByteArray(Isolate* isolate, base::Vector<T> data) { STATIC_ASSERT(std::is_trivial<T>::value); int byte_length = sizeof(T) * data.length(); Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length); DisallowGarbageCollection no_gc; MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length); return byte_array; } namespace { struct CompilationResult { Handle<ByteArray> bytecode; Handle<FixedArray> capture_name_map; }; // Compiles source pattern, but doesn't change the regexp object. base::Optional<CompilationResult> CompileImpl(Isolate* isolate, Handle<JSRegExp> regexp) { Zone zone(isolate->allocator(), ZONE_NAME); Handle<String> source(regexp->source(), isolate); // Parse and compile the regexp source. RegExpCompileData parse_result; DCHECK(!isolate->has_pending_exception()); bool parse_success = RegExpParser::ParseRegExpFromHeapString( isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->flags()), &parse_result); if (!parse_success) { // The pattern was already parsed successfully during initialization, so // the only way parsing can fail now is because of stack overflow. DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow); USE(RegExp::ThrowRegExpException(isolate, regexp, source, parse_result.error)); return base::nullopt; } ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile( parse_result.tree, JSRegExp::AsRegExpFlags(regexp->flags()), &zone); CompilationResult result; result.bytecode = VectorToByteArray(isolate, bytecode.ToVector()); result.capture_name_map = RegExp::CreateCaptureNameMap(isolate, parse_result.named_captures); return result; } } // namespace bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) { DCHECK(FLAG_enable_experimental_regexp_engine); DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP re->JSRegExpVerify(isolate); #endif Handle<String> source(re->source(), isolate); if (FLAG_trace_experimental_regexp_engine) { StdoutStream{} << "Compiling experimental regexp " << *source << std::endl; } base::Optional<CompilationResult> compilation_result = CompileImpl(isolate, re); if (!compilation_result.has_value()) { DCHECK(isolate->has_pending_exception()); return false; } re->set_bytecode_and_trampoline(isolate, compilation_result->bytecode); re->set_capture_name_map(compilation_result->capture_name_map); return true; } base::Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) { RegExpInstruction* inst_begin = reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress()); int inst_num = raw_bytes.length() / sizeof(RegExpInstruction); DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length()); return base::Vector<RegExpInstruction>(inst_begin, inst_num); } namespace { int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray bytecode, String subject, int capture_count, int32_t* output_registers, int32_t output_register_count, int32_t subject_index) { DisallowGarbageCollection no_gc; // TODO(cbruni): remove once gcmole is fixed. DisableGCMole no_gc_mole; int register_count_per_match = JSRegExp::RegistersForCaptureCount(capture_count); int32_t result; do { DCHECK(subject.IsFlat()); Zone zone(isolate->allocator(), ZONE_NAME); result = ExperimentalRegExpInterpreter::FindMatches( isolate, call_origin, bytecode, register_count_per_match, subject, subject_index, output_registers, output_register_count, &zone); } while (result == RegExp::kInternalRegExpRetry && call_origin == RegExp::kFromRuntime); return result; } } // namespace // Returns the number of matches. int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, RegExp::CallOrigin call_origin, JSRegExp regexp, String subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index) { DCHECK(FLAG_enable_experimental_regexp_engine); DisallowGarbageCollection no_gc; if (FLAG_trace_experimental_regexp_engine) { StdoutStream{} << "Executing experimental regexp " << regexp.source() << std::endl; } static constexpr bool kIsLatin1 = true; ByteArray bytecode = ByteArray::cast(regexp.bytecode(kIsLatin1)); return ExecRawImpl(isolate, call_origin, bytecode, subject, regexp.capture_count(), output_registers, output_register_count, subject_index); } int32_t ExperimentalRegExp::MatchForCallFromJs( Address subject, int32_t start_position, Address input_start, Address input_end, int* output_registers, int32_t output_register_count, RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) { DCHECK(FLAG_enable_experimental_regexp_engine); DCHECK_NOT_NULL(isolate); DCHECK_NOT_NULL(output_registers); DCHECK(call_origin == RegExp::CallOrigin::kFromJs); DisallowGarbageCollection no_gc; DisallowJavascriptExecution no_js(isolate); DisallowHandleAllocation no_handles; DisallowHandleDereference no_deref; String subject_string = String::cast(Object(subject)); JSRegExp regexp_obj = JSRegExp::cast(Object(regexp)); return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string, output_registers, output_register_count, start_position); } MaybeHandle<Object> ExperimentalRegExp::Exec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks) { DCHECK(FLAG_enable_experimental_regexp_engine); DCHECK_EQ(regexp->type_tag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP regexp->JSRegExpVerify(isolate); #endif if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) { DCHECK(isolate->has_pending_exception()); return MaybeHandle<Object>(); } DCHECK(IsCompiled(regexp, isolate)); subject = String::Flatten(isolate, subject); int capture_count = regexp->capture_count(); int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count); int32_t* output_registers; std::unique_ptr<int32_t[]> output_registers_release; if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) { output_registers = isolate->jsregexp_static_offsets_vector(); } else { output_registers = NewArray<int32_t>(output_register_count); output_registers_release.reset(output_registers); } int num_matches = ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject, output_registers, output_register_count, subject_index); if (num_matches > 0) { DCHECK_EQ(num_matches, 1); if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) { if (output_registers[0] >= subject->length()) { return isolate->factory()->null_value(); } } return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, output_registers); } else if (num_matches == 0) { return isolate->factory()->null_value(); } else { DCHECK_LT(num_matches, 0); DCHECK(isolate->has_pending_exception()); return MaybeHandle<Object>(); } } int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index) { DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); if (FLAG_trace_experimental_regexp_engine) { StdoutStream{} << "Experimental execution (oneshot) of regexp " << regexp->source() << std::endl; } base::Optional<CompilationResult> compilation_result = CompileImpl(isolate, regexp); if (!compilation_result.has_value()) return RegExp::kInternalRegExpException; DisallowGarbageCollection no_gc; return ExecRawImpl(isolate, RegExp::kFromRuntime, *compilation_result->bytecode, *subject, regexp->capture_count(), output_registers, output_register_count, subject_index); } MaybeHandle<Object> ExperimentalRegExp::OneshotExec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info, RegExp::ExecQuirks exec_quirks) { DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); DCHECK_NE(regexp->type_tag(), JSRegExp::NOT_COMPILED); int capture_count = regexp->capture_count(); int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count); int32_t* output_registers; std::unique_ptr<int32_t[]> output_registers_release; if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) { output_registers = isolate->jsregexp_static_offsets_vector(); } else { output_registers = NewArray<int32_t>(output_register_count); output_registers_release.reset(output_registers); } int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers, output_register_count, subject_index); if (num_matches > 0) { DCHECK_EQ(num_matches, 1); if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) { if (output_registers[0] >= subject->length()) { return isolate->factory()->null_value(); } } return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, output_registers); } else if (num_matches == 0) { return isolate->factory()->null_value(); } else { DCHECK_LT(num_matches, 0); DCHECK(isolate->has_pending_exception()); return MaybeHandle<Object>(); } } } // namespace internal } // namespace v8