/* * Copyright (c) 2020, Emanuel Sprung * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include "RegexByteCode.h" #include "RegexMatch.h" #include "RegexOptions.h" #include "RegexParser.h" #include #include #include #include #include #include #include #include namespace regex { static const constexpr size_t c_max_recursion = 5000; static const constexpr size_t c_match_preallocation_count = 0; struct RegexResult final { bool success { false }; size_t count { 0 }; Vector matches; Vector> capture_group_matches; Vector> named_capture_group_matches; size_t n_operations { 0 }; size_t n_capture_groups { 0 }; size_t n_named_capture_groups { 0 }; }; template class Regex; template class Matcher final { public: Matcher(const Regex& pattern, Optional::OptionsType> regex_options = {}) : m_pattern(pattern) , m_regex_options(regex_options.value_or({})) { } ~Matcher() = default; RegexResult match(const RegexStringView&, Optional::OptionsType> = {}) const; RegexResult match(const Vector, Optional::OptionsType> = {}) const; typename ParserTraits::OptionsType options() const { return m_regex_options; } private: Optional execute(const MatchInput& input, MatchState& state, MatchOutput& output, size_t recursion_level) const; ALWAYS_INLINE Optional execute_low_prio_forks(const MatchInput& input, MatchState& original_state, MatchOutput& output, Vector states, size_t recursion_level) const; const Regex& m_pattern; const typename ParserTraits::OptionsType m_regex_options; }; template class Regex final { public: String pattern_value; regex::Parser::Result parser_result; OwnPtr> matcher { nullptr }; mutable size_t start_offset { 0 }; explicit Regex(StringView pattern, typename ParserTraits::OptionsType regex_options = {}); ~Regex() = default; typename ParserTraits::OptionsType options() const; void print_bytecode(FILE* f = stdout) const; String error_string(Optional message = {}) const; RegexResult match(const RegexStringView view, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; return matcher->match(view, regex_options); } RegexResult match(const Vector views, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; return matcher->match(views, regex_options); } String replace(const RegexStringView view, const StringView& replacement_pattern, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; StringBuilder builder; size_t start_offset = 0; RegexResult result = matcher->match(view, regex_options); if (!result.success) return view.to_string(); for (size_t i = 0; i < result.matches.size(); ++i) { auto& match = result.matches[i]; builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string()); start_offset = match.global_offset + match.view.length(); GenericLexer lexer(replacement_pattern); while (!lexer.is_eof()) { if (lexer.consume_specific('\\')) { if (lexer.consume_specific('\\')) { builder.append('\\'); continue; } auto number = lexer.consume_while(isdigit); if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) { builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string()); } else { builder.appendff("\\{}", number); } } else { builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; })); } } } builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string()); return builder.to_string(); } // FIXME: replace(const Vector, ...) RegexResult search(const RegexStringView view, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; AllOptions options = (AllOptions)regex_options.value_or({}); if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) { options.reset_flag(AllFlags::MatchNotEndOfLine); options.reset_flag(AllFlags::MatchNotBeginOfLine); } options |= AllFlags::Global; return matcher->match(view, options); } RegexResult search(const Vector views, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; AllOptions options = (AllOptions)regex_options.value_or({}); if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) { options.reset_flag(AllFlags::MatchNotEndOfLine); options.reset_flag(AllFlags::MatchNotBeginOfLine); } options |= AllFlags::Global; return matcher->match(views, options); } bool match(const RegexStringView view, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = match(view, regex_options); return m.success; } bool match(const Vector views, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = match(views, regex_options); return m.success; } bool search(const RegexStringView view, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = search(view, regex_options); return m.success; } bool search(const Vector views, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = search(views, regex_options); return m.success; } bool has_match(const RegexStringView view, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return false; RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults); return result.success; } bool has_match(const Vector views, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return false; RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults); return result.success; } }; // free standing functions for match, search and has_match template RegexResult match(const RegexStringView view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template RegexResult match(const Vector view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template bool match(const RegexStringView view, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template bool match(const Vector view, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template RegexResult search(const RegexStringView view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.search(view, regex_options); } template RegexResult search(const Vector views, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.search(views, regex_options); } template bool search(const RegexStringView view, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.search(view, regex_options); } template bool search(const Vector views, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.search(views, regex_options); } template bool has_match(const RegexStringView view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.has_match(view, regex_options); } template bool has_match(const Vector views, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.has_match(views, regex_options); } } using regex::has_match; using regex::match; using regex::Regex; using regex::RegexResult;