/* ***************************************************************************** * ******************* C++ wrapper for PCRE2 Library **************************** * ***************************************************************************** * Copyright (c) Md. Jahidul Hamid * * ----------------------------------------------------------------------------- * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ /** @file jpcre2.hpp * @brief Main header file for JPCRE2 library to be included by programs that uses its functionalities. * It includes the `pcre2.h` header, therefore you shouldn't include `pcre2.h`, neither should you define `PCRE2_CODE_UNIT_WIDTH` before including * `jpcre2.hpp`. * If your `pcre2.h` header is not in standard include paths, you may include `pcre2.h` with correct path before including `jpcre2.hpp` * manually. In this case you will have to define `PCRE2_CODE_UNIT_WIDTH` before including `pcre2.h`. * Make sure to link required PCRE2 libraries when compiling. * * @author [Md Jahidul Hamid](https://github.com/neurobin) */ #ifndef JPCRE2_HPP #define JPCRE2_HPP #ifndef PCRE2_CODE_UNIT_WIDTH ///@def PCRE2_CODE_UNIT_WIDTH ///This macro does not have any significance in JPCRE2 context. ///It is defined as 0 by default. Defining it before including jpcre2.hpp ///will override the default (discouraged as it will make it harder for you to detect problems), ///but still it will have no effect in a JPCRE2 perspective. ///Defining it with an invalid value will yield to compile error. #define PCRE2_CODE_UNIT_WIDTH 0 #endif //previous inclusion of pcre2.h will be respected and we won't try to include it twice. //Thus one can pre-include pcre2.h from an arbitrary/non-standard path. #ifndef PCRE2_MAJOR #include // pcre2 header #endif #include // std::string, std::wstring #include // std::vector #include // std::map #include // std::fprintf #include // CHAR_BIT #include // std::abort() #if __cplusplus >= 201103L || _MSVC_LANG >= 201103L #define JPCRE2_USE_MINIMUM_CXX_11 1 #include #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK #include // std::function #endif #endif #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L #define JPCRE2_USE_MINIMUM_CXX_17 1 #include #else #ifdef JPCRE2_UNSET_CAPTURES_NULL #error JPCRE2_UNSET_CAPTURES_NULL requires C++17 #endif #endif #define JPCRE2_UNUSED(x) ((void)(x)) #if defined(NDEBUG) || defined(JPCRE2_NDEBUG) #define JPCRE2_ASSERT(cond, msg) ((void)0) #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) ((void)0) #else #define JPCRE2_ASSERT(cond, msg) jpcre2::jassert(cond, msg, __FILE__, __LINE__) #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) jpcre2::_jvassert(cond, name, __FILE__, __LINE__) #endif // In Windows, Windows.h defines ERROR macro // It conflicts with our jpcre2::ERROR namespace #ifdef ERROR #undef ERROR #endif /** @namespace jpcre2 * Top level namespace of JPCRE2. * * All functions, classes/structs, constants, enums that are provided by JPCRE2 belong to this namespace while * **PCRE2** structs, functions, constants remain outside of its scope. * * If you want to use any PCRE2 functions or constants, * remember that they are in the global scope and should be used as such. */ namespace jpcre2 { ///Define for JPCRE2 version. ///It can be used to support changes in different versions of the lib. #define JPCRE2_VERSION 103201L /** @namespace jpcre2::INFO * Namespace to provide information about JPCRE2 library itself. * Contains constant Strings with version info. */ namespace INFO { static const char NAME[] = "JPCRE2"; ///< Name of the project static const char FULL_VERSION[] = "10.32.01"; ///< Full version string static const char VERSION_GENRE[] = "10"; ///< Generation, depends on original PCRE2 version static const char VERSION_MAJOR[] = "32"; ///< Major version, updated when API change is made static const char VERSION_MINOR[] = "01"; ///< Minor version, includes bug fix or minor feature upgrade static const char VERSION_PRE_RELEASE[] = ""; ///< Alpha or beta (testing) release version } typedef PCRE2_SIZE SIZE_T; ///< Used for match count and vector size typedef uint32_t Uint; ///< Used for options (bitwise operation) typedef uint8_t Ush; ///< 8 bit unsigned integer. typedef std::vector VecOff; ///< vector of size_t. typedef std::vector VecOpt; ///< vector for Uint option values. /// @namespace jpcre2::ERROR /// Namespace for error codes. namespace ERROR { /** Error numbers for JPCRE2. * JPCRE2 error numbers are positive integers while * PCRE2 error numbers are negative integers. */ enum { INVALID_MODIFIER = 2, ///< Invalid modifier was detected INSUFFICIENT_OVECTOR = 3 ///< Ovector was not big enough during a match }; } /** These constants provide JPCRE2 options. */ enum { NONE = 0x0000000u, ///< Option 0 (zero) FIND_ALL = 0x0000002u, ///< Find all during match (global match) JIT_COMPILE = 0x0000004u ///< Perform JIT compilation for optimization }; //enableif and is_same implementation template struct EnableIf{}; template struct EnableIf{typedef T Type;}; template struct IsSame{ static const bool value = false; }; template struct IsSame{ static const bool value = true; }; ///JPCRE2 assert function. ///Aborts with an error message if condition fails. ///@param cond boolean condition ///@param msg message (std::string) ///@param f file where jassert was called. ///@param line line number where jassert was called. static inline void jassert(bool cond, const char* msg, const char* f, size_t line){ if(!cond) { std::fprintf(stderr,"\n\tE: AssertionFailure\n%s\nAssertion failed in file: %s\t at line: %u\n", msg, f, (unsigned)line); std::abort(); } } static inline void _jvassert(bool cond, char const * name, const char* f, size_t line){ jassert(cond, (std::string("ValueError: \n\ Required data vector of type ")+std::string(name)+" is empty.\n\ Your MatchEvaluator callback function is not\n\ compatible with existing data!!\n\ You are trying to use a vector that does not\n\ have any match data. Either call nreplace() or replace()\n\ with true or perform a match with appropriate\n\ callback function. For more details, refer to\n\ the doc in MatchEvaluator section.").c_str(), f, line); } static inline std::string _tostdstring(unsigned x){ char buf[128]; int written = std::sprintf(buf, "%u", x); return (written > 0) ? std::string(buf, buf + written) : std::string(); } ////////////////////////// The following are type and function mappings from PCRE2 interface to JPCRE2 interface ///////////////////////// //forward declaration template struct Pcre2Type; template struct Pcre2Func; //PCRE2 types //These templated types will be used in place of actual types template struct Pcre2Type {}; template<> struct Pcre2Type<8>{ //typedefs used typedef PCRE2_UCHAR8 Pcre2Uchar; typedef PCRE2_SPTR8 Pcre2Sptr; typedef pcre2_code_8 Pcre2Code; typedef pcre2_compile_context_8 CompileContext; typedef pcre2_match_data_8 MatchData; typedef pcre2_general_context_8 GeneralContext; typedef pcre2_match_context_8 MatchContext; typedef pcre2_jit_callback_8 JitCallback; typedef pcre2_jit_stack_8 JitStack; }; template<> struct Pcre2Type<16>{ //typedefs used typedef PCRE2_UCHAR16 Pcre2Uchar; typedef PCRE2_SPTR16 Pcre2Sptr; typedef pcre2_code_16 Pcre2Code; typedef pcre2_compile_context_16 CompileContext; typedef pcre2_match_data_16 MatchData; typedef pcre2_general_context_16 GeneralContext; typedef pcre2_match_context_16 MatchContext; typedef pcre2_jit_callback_16 JitCallback; typedef pcre2_jit_stack_16 JitStack; }; template<> struct Pcre2Type<32>{ //typedefs used typedef PCRE2_UCHAR32 Pcre2Uchar; typedef PCRE2_SPTR32 Pcre2Sptr; typedef pcre2_code_32 Pcre2Code; typedef pcre2_compile_context_32 CompileContext; typedef pcre2_match_data_32 MatchData; typedef pcre2_general_context_32 GeneralContext; typedef pcre2_match_context_32 MatchContext; typedef pcre2_jit_callback_32 JitCallback; typedef pcre2_jit_stack_32 JitStack; }; //wrappers for PCRE2 functions template struct Pcre2Func{}; //8-bit version template<> struct Pcre2Func<8> { static Pcre2Type<8>::CompileContext* compile_context_create(Pcre2Type<8>::GeneralContext *gcontext){ return pcre2_compile_context_create_8(gcontext); } static void compile_context_free(Pcre2Type<8>::CompileContext *ccontext){ pcre2_compile_context_free_8(ccontext); } static Pcre2Type<8>::CompileContext* compile_context_copy(Pcre2Type<8>::CompileContext* ccontext){ return pcre2_compile_context_copy_8(ccontext); } static const unsigned char * maketables(Pcre2Type<8>::GeneralContext* gcontext){ return pcre2_maketables_8(gcontext); } static int set_character_tables(Pcre2Type<8>::CompileContext * ccontext, const unsigned char * table){ return pcre2_set_character_tables_8(ccontext, table); } static Pcre2Type<8>::Pcre2Code * compile(Pcre2Type<8>::Pcre2Sptr pattern, PCRE2_SIZE length, uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, Pcre2Type<8>::CompileContext *ccontext){ return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext); } static int jit_compile(Pcre2Type<8>::Pcre2Code *code, uint32_t options){ return pcre2_jit_compile_8(code, options); } static int substitute( const Pcre2Type<8>::Pcre2Code *code, Pcre2Type<8>::Pcre2Sptr subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, Pcre2Type<8>::MatchData *match_data, Pcre2Type<8>::MatchContext *mcontext, Pcre2Type<8>::Pcre2Sptr replacement, PCRE2_SIZE rlength, Pcre2Type<8>::Pcre2Uchar *outputbuffer, PCRE2_SIZE *outlengthptr){ return pcre2_substitute_8( code, subject, length, startoffset, options, match_data, mcontext, replacement, rlength, outputbuffer, outlengthptr); } //~ static int substring_get_bynumber(Pcre2Type<8>::MatchData *match_data, //~ uint32_t number, //~ Pcre2Type<8>::Pcre2Uchar **bufferptr, //~ PCRE2_SIZE *bufflen){ //~ return pcre2_substring_get_bynumber_8(match_data, number, bufferptr, bufflen); //~ } //~ static int substring_get_byname(Pcre2Type<8>::MatchData *match_data, //~ Pcre2Type<8>::Pcre2Sptr name, //~ Pcre2Type<8>::Pcre2Uchar **bufferptr, //~ PCRE2_SIZE *bufflen){ //~ return pcre2_substring_get_byname_8(match_data, name, bufferptr, bufflen); //~ } //~ static void substring_free(Pcre2Type<8>::Pcre2Uchar *buffer){ //~ pcre2_substring_free_8(buffer); //~ } //~ static Pcre2Type<8>::Pcre2Code * code_copy(const Pcre2Type<8>::Pcre2Code *code){ //~ return pcre2_code_copy_8(code); //~ } static void code_free(Pcre2Type<8>::Pcre2Code *code){ pcre2_code_free_8(code); } static int get_error_message( int errorcode, Pcre2Type<8>::Pcre2Uchar *buffer, PCRE2_SIZE bufflen){ return pcre2_get_error_message_8(errorcode, buffer, bufflen); } static Pcre2Type<8>::MatchData * match_data_create_from_pattern( const Pcre2Type<8>::Pcre2Code *code, Pcre2Type<8>::GeneralContext *gcontext){ return pcre2_match_data_create_from_pattern_8(code, gcontext); } static int match( const Pcre2Type<8>::Pcre2Code *code, Pcre2Type<8>::Pcre2Sptr subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, Pcre2Type<8>::MatchData *match_data, Pcre2Type<8>::MatchContext *mcontext){ return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext); } static void match_data_free(Pcre2Type<8>::MatchData *match_data){ pcre2_match_data_free_8(match_data); } static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<8>::MatchData *match_data){ return pcre2_get_ovector_pointer_8(match_data); } static int pattern_info(const Pcre2Type<8>::Pcre2Code *code, uint32_t what, void *where){ return pcre2_pattern_info_8(code, what, where); } static int set_newline(Pcre2Type<8>::CompileContext *ccontext, uint32_t value){ return pcre2_set_newline_8(ccontext, value); } //~ static void jit_stack_assign(Pcre2Type<8>::MatchContext *mcontext, //~ Pcre2Type<8>::JitCallback callback_function, //~ void *callback_data){ //~ pcre2_jit_stack_assign_8(mcontext, callback_function, callback_data); //~ } //~ static Pcre2Type<8>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, //~ Pcre2Type<8>::GeneralContext *gcontext){ //~ return pcre2_jit_stack_create_8(startsize, maxsize, gcontext); //~ } //~ static void jit_stack_free(Pcre2Type<8>::JitStack *jit_stack){ //~ pcre2_jit_stack_free_8(jit_stack); //~ } //~ static void jit_free_unused_memory(Pcre2Type<8>::GeneralContext *gcontext){ //~ pcre2_jit_free_unused_memory_8(gcontext); //~ } //~ static Pcre2Type<8>::MatchContext *match_context_create(Pcre2Type<8>::GeneralContext *gcontext){ //~ return pcre2_match_context_create_8(gcontext); //~ } //~ static Pcre2Type<8>::MatchContext *match_context_copy(Pcre2Type<8>::MatchContext *mcontext){ //~ return pcre2_match_context_copy_8(mcontext); //~ } //~ static void match_context_free(Pcre2Type<8>::MatchContext *mcontext){ //~ pcre2_match_context_free_8(mcontext); //~ } static uint32_t get_ovector_count(Pcre2Type<8>::MatchData *match_data){ return pcre2_get_ovector_count_8(match_data); } }; //16-bit version template<> struct Pcre2Func<16> { static Pcre2Type<16>::CompileContext* compile_context_create(Pcre2Type<16>::GeneralContext *gcontext){ return pcre2_compile_context_create_16(gcontext); } static void compile_context_free(Pcre2Type<16>::CompileContext *ccontext){ pcre2_compile_context_free_16(ccontext); } static Pcre2Type<16>::CompileContext* compile_context_copy(Pcre2Type<16>::CompileContext* ccontext){ return pcre2_compile_context_copy_16(ccontext); } static const unsigned char * maketables(Pcre2Type<16>::GeneralContext* gcontext){ return pcre2_maketables_16(gcontext); } static int set_character_tables(Pcre2Type<16>::CompileContext * ccontext, const unsigned char * table){ return pcre2_set_character_tables_16(ccontext, table); } static Pcre2Type<16>::Pcre2Code * compile(Pcre2Type<16>::Pcre2Sptr pattern, PCRE2_SIZE length, uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, Pcre2Type<16>::CompileContext *ccontext){ return pcre2_compile_16(pattern, length, options, errorcode, erroroffset, ccontext); } static int jit_compile(Pcre2Type<16>::Pcre2Code *code, uint32_t options){ return pcre2_jit_compile_16(code, options); } static int substitute( const Pcre2Type<16>::Pcre2Code *code, Pcre2Type<16>::Pcre2Sptr subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, Pcre2Type<16>::MatchData *match_data, Pcre2Type<16>::MatchContext *mcontext, Pcre2Type<16>::Pcre2Sptr replacement, PCRE2_SIZE rlength, Pcre2Type<16>::Pcre2Uchar *outputbuffer, PCRE2_SIZE *outlengthptr){ return pcre2_substitute_16( code, subject, length, startoffset, options, match_data, mcontext, replacement, rlength, outputbuffer, outlengthptr); } //~ static int substring_get_bynumber(Pcre2Type<16>::MatchData *match_data, //~ uint32_t number, //~ Pcre2Type<16>::Pcre2Uchar **bufferptr, //~ PCRE2_SIZE *bufflen){ //~ return pcre2_substring_get_bynumber_16(match_data, number, bufferptr, bufflen); //~ } //~ static int substring_get_byname(Pcre2Type<16>::MatchData *match_data, //~ Pcre2Type<16>::Pcre2Sptr name, //~ Pcre2Type<16>::Pcre2Uchar **bufferptr, //~ PCRE2_SIZE *bufflen){ //~ return pcre2_substring_get_byname_16(match_data, name, bufferptr, bufflen); //~ } //~ static void substring_free(Pcre2Type<16>::Pcre2Uchar *buffer){ //~ pcre2_substring_free_16(buffer); //~ } //~ static Pcre2Type<16>::Pcre2Code * code_copy(const Pcre2Type<16>::Pcre2Code *code){ //~ return pcre2_code_copy_16(code); //~ } static void code_free(Pcre2Type<16>::Pcre2Code *code){ pcre2_code_free_16(code); } static int get_error_message( int errorcode, Pcre2Type<16>::Pcre2Uchar *buffer, PCRE2_SIZE bufflen){ return pcre2_get_error_message_16(errorcode, buffer, bufflen); } static Pcre2Type<16>::MatchData * match_data_create_from_pattern( const Pcre2Type<16>::Pcre2Code *code, Pcre2Type<16>::GeneralContext *gcontext){ return pcre2_match_data_create_from_pattern_16(code, gcontext); } static int match( const Pcre2Type<16>::Pcre2Code *code, Pcre2Type<16>::Pcre2Sptr subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, Pcre2Type<16>::MatchData *match_data, Pcre2Type<16>::MatchContext *mcontext){ return pcre2_match_16(code, subject, length, startoffset, options, match_data, mcontext); } static void match_data_free(Pcre2Type<16>::MatchData *match_data){ pcre2_match_data_free_16(match_data); } static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<16>::MatchData *match_data){ return pcre2_get_ovector_pointer_16(match_data); } static int pattern_info(const Pcre2Type<16>::Pcre2Code *code, uint32_t what, void *where){ return pcre2_pattern_info_16(code, what, where); } static int set_newline(Pcre2Type<16>::CompileContext *ccontext, uint32_t value){ return pcre2_set_newline_16(ccontext, value); } //~ static void jit_stack_assign(Pcre2Type<16>::MatchContext *mcontext, //~ Pcre2Type<16>::JitCallback callback_function, //~ void *callback_data){ //~ pcre2_jit_stack_assign_16(mcontext, callback_function, callback_data); //~ } //~ static Pcre2Type<16>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, //~ Pcre2Type<16>::GeneralContext *gcontext){ //~ return pcre2_jit_stack_create_16(startsize, maxsize, gcontext); //~ } //~ static void jit_stack_free(Pcre2Type<16>::JitStack *jit_stack){ //~ pcre2_jit_stack_free_16(jit_stack); //~ } //~ static void jit_free_unused_memory(Pcre2Type<16>::GeneralContext *gcontext){ //~ pcre2_jit_free_unused_memory_16(gcontext); //~ } //~ static Pcre2Type<16>::MatchContext *match_context_create(Pcre2Type<16>::GeneralContext *gcontext){ //~ return pcre2_match_context_create_16(gcontext); //~ } //~ static Pcre2Type<16>::MatchContext *match_context_copy(Pcre2Type<16>::MatchContext *mcontext){ //~ return pcre2_match_context_copy_16(mcontext); //~ } //~ static void match_context_free(Pcre2Type<16>::MatchContext *mcontext){ //~ pcre2_match_context_free_16(mcontext); //~ } static uint32_t get_ovector_count(Pcre2Type<16>::MatchData *match_data){ return pcre2_get_ovector_count_16(match_data); } }; //32-bit version template<> struct Pcre2Func<32> { static Pcre2Type<32>::CompileContext* compile_context_create(Pcre2Type<32>::GeneralContext *gcontext){ return pcre2_compile_context_create_32(gcontext); } static void compile_context_free(Pcre2Type<32>::CompileContext *ccontext){ pcre2_compile_context_free_32(ccontext); } static Pcre2Type<32>::CompileContext* compile_context_copy(Pcre2Type<32>::CompileContext* ccontext){ return pcre2_compile_context_copy_32(ccontext); } static const unsigned char * maketables(Pcre2Type<32>::GeneralContext* gcontext){ return pcre2_maketables_32(gcontext); } static int set_character_tables(Pcre2Type<32>::CompileContext * ccontext, const unsigned char * table){ return pcre2_set_character_tables_32(ccontext, table); } static Pcre2Type<32>::Pcre2Code * compile(Pcre2Type<32>::Pcre2Sptr pattern, PCRE2_SIZE length, uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, Pcre2Type<32>::CompileContext *ccontext){ return pcre2_compile_32(pattern, length, options, errorcode, erroroffset, ccontext); } static int jit_compile(Pcre2Type<32>::Pcre2Code *code, uint32_t options){ return pcre2_jit_compile_32(code, options); } static int substitute( const Pcre2Type<32>::Pcre2Code *code, Pcre2Type<32>::Pcre2Sptr subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, Pcre2Type<32>::MatchData *match_data, Pcre2Type<32>::MatchContext *mcontext, Pcre2Type<32>::Pcre2Sptr replacement, PCRE2_SIZE rlength, Pcre2Type<32>::Pcre2Uchar *outputbuffer, PCRE2_SIZE *outlengthptr){ return pcre2_substitute_32( code, subject, length, startoffset, options, match_data, mcontext, replacement, rlength, outputbuffer, outlengthptr); } //~ static int substring_get_bynumber(Pcre2Type<32>::MatchData *match_data, //~ uint32_t number, //~ Pcre2Type<32>::Pcre2Uchar **bufferptr, //~ PCRE2_SIZE *bufflen){ //~ return pcre2_substring_get_bynumber_32(match_data, number, bufferptr, bufflen); //~ } //~ static int substring_get_byname(Pcre2Type<32>::MatchData *match_data, //~ Pcre2Type<32>::Pcre2Sptr name, //~ Pcre2Type<32>::Pcre2Uchar **bufferptr, //~ PCRE2_SIZE *bufflen){ //~ return pcre2_substring_get_byname_32(match_data, name, bufferptr, bufflen); //~ } //~ static void substring_free(Pcre2Type<32>::Pcre2Uchar *buffer){ //~ pcre2_substring_free_32(buffer); //~ } //~ static Pcre2Type<32>::Pcre2Code * code_copy(const Pcre2Type<32>::Pcre2Code *code){ //~ return pcre2_code_copy_32(code); //~ } static void code_free(Pcre2Type<32>::Pcre2Code *code){ pcre2_code_free_32(code); } static int get_error_message( int errorcode, Pcre2Type<32>::Pcre2Uchar *buffer, PCRE2_SIZE bufflen){ return pcre2_get_error_message_32(errorcode, buffer, bufflen); } static Pcre2Type<32>::MatchData * match_data_create_from_pattern( const Pcre2Type<32>::Pcre2Code *code, Pcre2Type<32>::GeneralContext *gcontext){ return pcre2_match_data_create_from_pattern_32(code, gcontext); } static int match( const Pcre2Type<32>::Pcre2Code *code, Pcre2Type<32>::Pcre2Sptr subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, Pcre2Type<32>::MatchData *match_data, Pcre2Type<32>::MatchContext *mcontext){ return pcre2_match_32(code, subject, length, startoffset, options, match_data, mcontext); } static void match_data_free(Pcre2Type<32>::MatchData *match_data){ pcre2_match_data_free_32(match_data); } static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<32>::MatchData *match_data){ return pcre2_get_ovector_pointer_32(match_data); } static int pattern_info(const Pcre2Type<32>::Pcre2Code *code, uint32_t what, void *where){ return pcre2_pattern_info_32(code, what, where); } static int set_newline(Pcre2Type<32>::CompileContext *ccontext, uint32_t value){ return pcre2_set_newline_32(ccontext, value); } //~ static void jit_stack_assign(Pcre2Type<32>::MatchContext *mcontext, //~ Pcre2Type<32>::JitCallback callback_function, //~ void *callback_data){ //~ pcre2_jit_stack_assign_32(mcontext, callback_function, callback_data); //~ } //~ static Pcre2Type<32>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, //~ Pcre2Type<32>::GeneralContext *gcontext){ //~ return pcre2_jit_stack_create_32(startsize, maxsize, gcontext); //~ } //~ static void jit_stack_free(Pcre2Type<32>::JitStack *jit_stack){ //~ pcre2_jit_stack_free_32(jit_stack); //~ } //~ static void jit_free_unused_memory(Pcre2Type<32>::GeneralContext *gcontext){ //~ pcre2_jit_free_unused_memory_32(gcontext); //~ } //~ static Pcre2Type<32>::MatchContext *match_context_create(Pcre2Type<32>::GeneralContext *gcontext){ //~ return pcre2_match_context_create_32(gcontext); //~ } //~ static Pcre2Type<32>::MatchContext *match_context_copy(Pcre2Type<32>::MatchContext *mcontext){ //~ return pcre2_match_context_copy_32(mcontext); //~ } //~ static void match_context_free(Pcre2Type<32>::MatchContext *mcontext){ //~ pcre2_match_context_free_32(mcontext); //~ } static uint32_t get_ovector_count(Pcre2Type<32>::MatchData *match_data){ return pcre2_get_ovector_count_32(match_data); } }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///Class to take a std::string modifier value with null safety. ///You don't need to make an instance of this class to pass modifier, ///just pass std::string or char const*, whatever seems feasible, ///implicit conversion will kick in and take care of things for you. class Modifier{ std::string mod; public: ///Default constructor. Modifier(){} ///Constructor that takes a std::string. ///@param x std::string as a reference. Modifier(std::string const& x):mod(x){} ///Constructor that takes char const * (null safety is provided by this one) ///@param x char const * Modifier(char const *x):mod(x?x:""){} ///Returns the modifier string ///@return modifier string (std::string) std::string str() const { return mod; } ///Returns the c_str() of modifier string ///@return char const * char const * c_str() const { return mod.c_str(); } ///Returns the length of the modifier string ///@return length SIZE_T length() const{ return mod.length(); } ///operator[] overload to access character by index. ///@param i index ///@return character at index i. char operator[](SIZE_T i) const { return mod[i]; } }; // Namespace for modifier constants. // For each modifier constant there is a jpcre2::Uint option value. // Some modifiers may have multiple values set together (ORed in bitwise operation) and // thus they may include other modifiers. Such an example is the 'n' modifier. It is combined together with 'u'. namespace MOD { // Define modifiers for compile // String of compile modifier characters for PCRE2 options static const char C_N[] = "eijmnsuxADJU"; // Array of compile modifier values for PCRE2 options // Uint is being used in getModifier() in for loop to get the number of element in this array, // be sure to chnage there if you change here. static const jpcre2::Uint C_V[12] = { PCRE2_MATCH_UNSET_BACKREF, // Modifier e PCRE2_CASELESS, // Modifier i PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF, // Modifier j PCRE2_MULTILINE, // Modifier m PCRE2_UTF | PCRE2_UCP, // Modifier n (includes u) PCRE2_DOTALL, // Modifier s PCRE2_UTF, // Modifier u PCRE2_EXTENDED, // Modifier x PCRE2_ANCHORED, // Modifier A PCRE2_DOLLAR_ENDONLY, // Modifier D PCRE2_DUPNAMES, // Modifier J PCRE2_UNGREEDY // Modifier U }; // String of compile modifier characters for JPCRE2 options static const char CJ_N[] = "S"; // Array of compile modifier values for JPCRE2 options static const jpcre2::Uint CJ_V[1] = { JIT_COMPILE, // Modifier S }; // Define modifiers for replace // String of action (replace) modifier characters for PCRE2 options static const char R_N[] = "eEgx"; // Array of action (replace) modifier values for PCRE2 options static const jpcre2::Uint R_V[4] = { PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier e PCRE2_SUBSTITUTE_UNKNOWN_UNSET | PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier E (includes e) PCRE2_SUBSTITUTE_GLOBAL, // Modifier g PCRE2_SUBSTITUTE_EXTENDED // Modifier x }; // String of action (replace) modifier characters for JPCRE2 options static const char RJ_N[] = ""; // Array of action (replace) modifier values for JPCRE2 options static const jpcre2::Uint RJ_V[1] = { NONE //placeholder }; // Define modifiers for match // String of action (match) modifier characters for PCRE2 options static const char M_N[] = "A"; // Array of action (match) modifier values for PCRE2 options static const jpcre2::Uint M_V[1] = { PCRE2_ANCHORED // Modifier A }; // String of action (match) modifier characters for JPCRE2 options static const char MJ_N[] = "g"; // Array of action (match) modifier values for JPCRE2 options static const jpcre2::Uint MJ_V[1] = { FIND_ALL, // Modifier g }; static inline void toOption(Modifier const& mod, bool x, Uint const * J_V, char const * J_N, SIZE_T SJ, Uint const * V, char const * N, SIZE_T S, Uint* po, Uint* jo, int* en, SIZE_T* eo ){ //loop through mod SIZE_T n = mod.length(); for (SIZE_T i = 0; i < n; ++i) { //First check for JPCRE2 mods for(SIZE_T j = 0; j < SJ; ++j){ if(J_N[j] == mod[i]) { if(x) *jo |= J_V[j]; else *jo &= ~J_V[j]; goto endfor; } } //Now check for PCRE2 mods for(SIZE_T j = 0; j< S; ++j){ if(N[j] == mod[i]){ if(x) *po |= V[j]; else *po &= ~V[j]; goto endfor; } } //Modifier didn't match, invalid modifier *en = (int)ERROR::INVALID_MODIFIER; *eo = (int)mod[i]; endfor:; } } static inline void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ toOption(mod, x, MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint), M_V, M_N, sizeof(M_V)/sizeof(Uint), po, jo, en, eo); } static inline void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ toOption(mod, x, RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint), R_V, R_N, sizeof(R_V)/sizeof(Uint), po, jo, en, eo); } static inline void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ toOption(mod, x, CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint), C_V, C_N, sizeof(C_V)/sizeof(Uint), po, jo, en, eo); } static inline std::string fromOption(Uint const * J_V, char const * J_N, SIZE_T SJ, Uint const * V, char const * N, SIZE_T S, Uint po, Uint jo ){ std::string mod; //Calculate PCRE2 mod for(SIZE_T i = 0; i < S; ++i){ if( (V[i] & po) != 0 && (V[i] & po) == V[i]) //One option can include other mod += N[i]; } //Calculate JPCRE2 mod for(SIZE_T i = 0; i < SJ; ++i){ if( (J_V[i] & jo) != 0 && (J_V[i] & jo) == J_V[i]) //One option can include other mod += J_N[i]; } return mod; } static inline std::string fromMatchOption(Uint po, Uint jo){ return fromOption(MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint), M_V, M_N, sizeof(M_V)/sizeof(Uint), po, jo); } static inline std::string fromReplaceOption(Uint po, Uint jo){ return fromOption(RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint), R_V, R_N, sizeof(R_V)/sizeof(Uint), po, jo); } static inline std::string fromCompileOption(Uint po, Uint jo){ return fromOption(CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint), C_V, C_N, sizeof(C_V)/sizeof(Uint), po, jo); } } //MOD namespace ends ///Lets you create custom modifier tables. ///An instance of this class can be passed to ///match, replace or compile related class objects. class ModifierTable{ std::string tabjms; std::string tabms; std::string tabjrs; std::string tabrs; std::string tabjcs; std::string tabcs; VecOpt tabjmv; VecOpt tabmv; VecOpt tabjrv; VecOpt tabrv; VecOpt tabjcv; VecOpt tabcv; void toOption(Modifier const& mod, bool x, VecOpt const& J_V, std::string const& J_N, VecOpt const& V, std::string const& N, Uint* po, Uint* jo, int* en, SIZE_T* eo ) const{ SIZE_T SJ = J_V.size(); SIZE_T S = V.size(); JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str()); JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str()); MOD::toOption(mod, x, J_V.empty()?0:&J_V[0], J_N.c_str(), SJ, V.empty()?0:&V[0], N.c_str(), S, po, jo, en, eo ); } std::string fromOption(VecOpt const& J_V, std::string const& J_N, VecOpt const& V, std::string const& N, Uint po, Uint jo) const{ SIZE_T SJ = J_V.size(); SIZE_T S = V.size(); JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str()); JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str()); return MOD::fromOption(J_V.empty()?0:&J_V[0], J_N.c_str(), SJ, V.empty()?0:&V[0], N.c_str(), S, po, jo); } void parseModifierTable(std::string& tabjs, VecOpt& tabjv, std::string& tab_s, VecOpt& tab_v, std::string const& tabs, VecOpt const& tabv); public: ///Default constructor that creates an empty modifier table. ModifierTable(){} ///@overload ///@param deflt Initialize with default table if true, otherwise keep empty. ModifierTable(bool deflt){ if(deflt) setAllToDefault(); } ///Reset the match modifier table to its initial (empty) state including memory. ///@return A reference to the calling ModifierTable object. ModifierTable& resetMatchModifierTable(){ std::string().swap(tabjms); std::string().swap(tabms); VecOpt().swap(tabjmv); VecOpt().swap(tabmv); return *this; } ///Reset the replace modifier table to its initial (empty) state including memory. ///@return A reference to the calling ModifierTable object. ModifierTable& resetReplaceModifierTable(){ std::string().swap(tabjrs); std::string().swap(tabrs); VecOpt().swap(tabjrv); VecOpt().swap(tabrv); return *this; } ///Reset the compile modifier table to its initial (empty) state including memory. ///@return A reference to the calling ModifierTable object. ModifierTable& resetCompileModifierTable(){ std::string().swap(tabjcs); std::string().swap(tabcs); VecOpt().swap(tabjcv); VecOpt().swap(tabcv); return *this; } ///Reset the modifier tables to their initial (empty) state including memory. ///@return A reference to the calling ModifierTable object. ModifierTable& reset(){ resetMatchModifierTable(); resetReplaceModifierTable(); resetCompileModifierTable(); return *this; } ///Clear the match modifier table to its initial (empty) state. ///Memory may retain for further use. ///@return A reference to the calling ModifierTable object. ModifierTable& clearMatchModifierTable(){ tabjms.clear(); tabms.clear(); tabjmv.clear(); tabmv.clear(); return *this; } ///Clear the replace modifier table to its initial (empty) state. ///Memory may retain for further use. ///@return A reference to the calling ModifierTable object. ModifierTable& clearReplaceModifierTable(){ tabjrs.clear(); tabrs.clear(); tabjrv.clear(); tabrv.clear(); return *this; } ///Clear the compile modifier table to its initial (empty) state. ///Memory may retain for further use. ///@return A reference to the calling ModifierTable object. ModifierTable& clearCompileModifierTable(){ tabjcs.clear(); tabcs.clear(); tabjcv.clear(); tabcv.clear(); return *this; } ///Clear the modifier tables to their initial (empty) state. ///Memory may retain for further use. ///@return A reference to the calling ModifierTable object. ModifierTable& clear(){ clearMatchModifierTable(); clearReplaceModifierTable(); clearCompileModifierTable(); return *this; } ///Modifier parser for match related options. ///@param mod modifier string ///@param x whether to add or remove the modifers. ///@param po pointer to PCRE2 match option that will be modified. ///@param jo pointer to JPCRE2 match option that will be modified. ///@param en where to put the error number. ///@param eo where to put the error offset. void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { toOption(mod, x,tabjmv,tabjms,tabmv, tabms,po,jo,en,eo); } ///Modifier parser for replace related options. ///@param mod modifier string ///@param x whether to add or remove the modifers. ///@param po pointer to PCRE2 replace option that will be modified. ///@param jo pointer to JPCRE2 replace option that will be modified. ///@param en where to put the error number. ///@param eo where to put the error offset. void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { return toOption(mod, x,tabjrv,tabjrs,tabrv,tabrs,po,jo,en,eo); } ///Modifier parser for compile related options. ///@param mod modifier string ///@param x whether to add or remove the modifers. ///@param po pointer to PCRE2 compile option that will be modified. ///@param jo pointer to JPCRE2 compile option that will be modified. ///@param en where to put the error number. ///@param eo where to put the error offset. void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { return toOption(mod, x,tabjcv,tabjcs,tabcv,tabcs,po,jo,en,eo); } ///Take match related option value and convert to modifier string. ///@param po PCRE2 option. ///@param jo JPCRE2 option. ///@return modifier string (std::string) std::string fromMatchOption(Uint po, Uint jo) const { return fromOption(tabjmv,tabjms,tabmv,tabms,po,jo); } ///Take replace related option value and convert to modifier string. ///@param po PCRE2 option. ///@param jo JPCRE2 option. ///@return modifier string (std::string) std::string fromReplaceOption(Uint po, Uint jo) const { return fromOption(tabjrv,tabjrs,tabrv,tabrs,po,jo); } ///Take compile related option value and convert to modifier string. ///@param po PCRE2 option. ///@param jo JPCRE2 option. ///@return modifier string (std::string) std::string fromCompileOption(Uint po, Uint jo) const { return fromOption(tabjcv,tabjcs,tabcv,tabcs,po,jo); } ///Set modifier table for match. ///Takes a string and a vector of sequential options. ///@param tabs modifier string (list of modifiers) ///@param tabv vector of Uint (options). ///@return A reference to the calling ModifierTable object. ModifierTable& setMatchModifierTable(std::string const& tabs, VecOpt const& tabv){ parseModifierTable(tabjms, tabjmv, tabms, tabmv, tabs, tabv); return *this; } ///Set modifier table for match. ///Takes a string and an array of sequential options. ///@param tabs modifier string (list of modifiers) ///@param tabvp array of Uint (options). If null, table is set to empty. ///@return A reference to the calling ModifierTable object. ModifierTable& setMatchModifierTable(std::string const& tabs, const Uint* tabvp){ if(tabvp) { VecOpt tabv(tabvp, tabvp + tabs.length()); setMatchModifierTable(tabs, tabv); } else clearMatchModifierTable(); return *this; } ///@overload ///... ///This one takes modifier and value by array. ///If the arrays are not of the same length, the behavior is undefined. ///If any of the argument is null, the table is set empty. ///@param tabsp modifier string (list of modifiers). ///@param tabvp array of Uint (options). ///@return A reference to the calling ModifierTable object. ModifierTable& setMatchModifierTable(const char* tabsp, const Uint* tabvp){ if(tabsp && tabvp) { std::string tabs(tabsp); VecOpt tabv(tabvp, tabvp + tabs.length()); setMatchModifierTable(tabs, tabv); } else clearMatchModifierTable(); return *this; } ///Set modifier table for replace. ///Takes a string and a vector of sequential options. ///@param tabs modifier string (list of modifiers) ///@param tabv vector of Uint (options). ///@return A reference to the calling ModifierTable object. ModifierTable& setReplaceModifierTable(std::string const& tabs, VecOpt const& tabv){ parseModifierTable(tabjrs, tabjrv, tabrs, tabrv, tabs, tabv); return *this; } ///Set modifier table for replace. ///Takes a string and an array of sequential options. ///@param tabs modifier string (list of modifiers) ///@param tabvp array of Uint (options). If null, table is set to empty. ///@return A reference to the calling ModifierTable object. ModifierTable& setReplaceModifierTable(std::string const& tabs, const Uint* tabvp){ if(tabvp) { VecOpt tabv(tabvp, tabvp + tabs.length()); setReplaceModifierTable(tabs, tabv); } else clearReplaceModifierTable(); return *this; } ///@overload ///... ///This one takes modifier and value by array. ///If the arrays are not of the same length, the behavior is undefined. ///If any of the argument is null, the table is set empty. ///@param tabsp modifier string (list of modifiers). ///@param tabvp array of Uint (options). ///@return A reference to the calling ModifierTable object. ModifierTable& setReplaceModifierTable(const char* tabsp, const Uint* tabvp){ if(tabsp && tabvp) { std::string tabs(tabsp); VecOpt tabv(tabvp, tabvp + tabs.length()); setReplaceModifierTable(tabs, tabv); } else clearReplaceModifierTable(); return *this; } ///Set modifier table for compile. ///Takes a string and a vector of sequential options. ///@param tabs modifier string (list of modifiers) ///@param tabv vector of Uint (options). ///@return A reference to the calling ModifierTable object. ModifierTable& setCompileModifierTable(std::string const& tabs, VecOpt const& tabv){ parseModifierTable(tabjcs, tabjcv, tabcs, tabcv, tabs, tabv); return *this; } ///Set modifier table for compile. ///Takes a string and an array of sequential options. ///@param tabs modifier string (list of modifiers) ///@param tabvp array of Uint (options). If null, table is set to empty. ///@return A reference to the calling ModifierTable object. ModifierTable& setCompileModifierTable(std::string const& tabs, const Uint* tabvp){ if(tabvp) { VecOpt tabv(tabvp, tabvp + tabs.length()); setCompileModifierTable(tabs, tabv); } else clearCompileModifierTable(); return *this; } ///@overload ///... ///This one takes modifier and value by array. ///If the arrays are not of the same length, the behavior is undefined. ///If any of the argument is null, the table is set empty. ///@param tabsp modifier string (list of modifiers). ///@param tabvp array of Uint (options). ///@return A reference to the calling ModifierTable object. ModifierTable& setCompileModifierTable(const char* tabsp, const Uint* tabvp){ if(tabsp && tabvp) { std::string tabs(tabsp); VecOpt tabv(tabvp, tabvp + tabs.length()); setCompileModifierTable(tabs, tabv); } else clearCompileModifierTable(); return *this; } ///Set match modifie table to default ///@return A reference to the calling ModifierTable object. ModifierTable& setMatchModifierTableToDefault(){ tabjms = std::string(MOD::MJ_N, MOD::MJ_N + sizeof(MOD::MJ_V)/sizeof(Uint)); tabms = std::string(MOD::M_N, MOD::M_N + sizeof(MOD::M_V)/sizeof(Uint)); tabjmv = VecOpt(MOD::MJ_V, MOD::MJ_V + sizeof(MOD::MJ_V)/sizeof(Uint)); tabmv = VecOpt(MOD::M_V, MOD::M_V + sizeof(MOD::M_V)/sizeof(Uint)); return *this; } ///Set replace modifier table to default. ///@return A reference to the calling ModifierTable object. ModifierTable& setReplaceModifierTableToDefault(){ tabjrs = std::string(MOD::RJ_N, MOD::RJ_N + sizeof(MOD::RJ_V)/sizeof(Uint)); tabrs = std::string(MOD::R_N, MOD::R_N + sizeof(MOD::R_V)/sizeof(Uint)); tabjrv = VecOpt(MOD::RJ_V, MOD::RJ_V + sizeof(MOD::RJ_V)/sizeof(Uint)); tabrv = VecOpt(MOD::R_V, MOD::R_V + sizeof(MOD::R_V)/sizeof(Uint)); return *this; } ///Set compile modifier table to default. ///@return A reference to the calling ModifierTable object. ModifierTable& setCompileModifierTableToDefault(){ tabjcs = std::string(MOD::CJ_N, MOD::CJ_N + sizeof(MOD::CJ_V)/sizeof(Uint)); tabcs = std::string(MOD::C_N, MOD::C_N + sizeof(MOD::C_V)/sizeof(Uint)); tabjcv = VecOpt(MOD::CJ_V, MOD::CJ_V + sizeof(MOD::CJ_V)/sizeof(Uint)); tabcv = VecOpt(MOD::C_V, MOD::C_V + sizeof(MOD::C_V)/sizeof(Uint)); return *this; } ///Set all tables to default. ///@return A reference to the calling ModifierTable object. ModifierTable& setAllToDefault(){ setMatchModifierTableToDefault(); setReplaceModifierTableToDefault(); setCompileModifierTableToDefault(); return *this; } }; //These message strings are used for error/warning message construction. //take care to prevent multiple definition template struct MSG{ static std::basic_string INVALID_MODIFIER(void); static std::basic_string INSUFFICIENT_OVECTOR(void); }; //specialization template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return "Invalid modifier: "; } template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return L"Invalid modifier: "; } template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return "ovector wasn't big enough"; } template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return L"ovector wasn't big enough"; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return u"Invalid modifier: "; } template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return U"Invalid modifier: "; } template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return u"ovector wasn't big enough"; } template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return U"ovector wasn't big enough"; } #endif ///struct to select the types. /// ///@tparam Char_T Character type (`char`, `wchar_t`, `char16_t`, `char32_t`) ///@tparam Map Optional parameter (Only `>= C++11`) to specify a map container (`std::map`, `std::unordered_map` etc..). Default is `std::map`. /// ///The character type (`Char_T`) must be in accordance with the PCRE2 library you are linking against. ///If not sure which library you need, link against all 3 PCRE2 libraries and they will be used as needed. /// ///If you want to be specific, then here's the rule: /// ///1. If `Char_T` is 8 bit, you need 8 bit PCRE2 library ///2. If `Char_T` is 16 bit, you need 16 bit PCRE2 library ///3. If `Char_T` is 32 bit, you need 32 bit PCRE2 library ///4. if `Char_T` is not 8 or 16 or 32 bit, you will get compile error. /// ///In `>= C++11` you get an additional optional template parameter to specify a map container. ///For example, you can use `std::unordered_map` instead of the default `std::map`: /// ```cpp /// #include /// typedef jpcre2::select jp; /// ``` /// ///We will use the following typedef throughout this doc: ///```cpp ///typedef jpcre2::select jp; ///``` #ifdef JPCRE2_USE_MINIMUM_CXX_11 template class Map=std::map> #else template #endif struct select{ ///Typedef for character (`char`, `wchar_t`, `char16_t`, `char32_t`) typedef Char_T Char; //typedef Char_T Char; ///Typedef for string (`std::string`, `std::wstring`, `std::u16string`, `std::u32string`). ///Defined as `std::basic_string`. ///May be this list will make more sense: ///Character | String ///--------- | ------- ///char | std::string ///wchar_t | std::wstring ///char16_t | std::u16string (>=C++11) ///char32_t | std::u32string (>=C++11) typedef typename std::basic_string String; #ifdef JPCRE2_USE_MINIMUM_CXX_11 ///Map for Named substrings. typedef class Map MapNas; ///Substring name to Substring number map. typedef class Map MapNtN; #else ///Map for Named substrings. typedef typename std::map MapNas; ///Substring name to Substring number map. typedef typename std::map MapNtN; #endif ///Allow spelling mistake of MapNtN as MapNtn. typedef MapNtN MapNtn; ///Vector for Numbered substrings (Sub container). #ifdef JPCRE2_UNSET_CAPTURES_NULL typedef typename std::vector> NumSub; #else typedef typename std::vector NumSub; #endif ///Vector of matches with named substrings. typedef typename std::vector VecNas; ///Vector of substring name to substring number map. typedef typename std::vector VecNtN; ///Allow spelling mistake of VecNtN as VecNtn. typedef VecNtN VecNtn; ///Vector of matches with numbered substrings. typedef typename std::vector VecNum; //These are to shorten the code typedef typename Pcre2Type::Pcre2Uchar Pcre2Uchar; typedef typename Pcre2Type::Pcre2Sptr Pcre2Sptr; typedef typename Pcre2Type::Pcre2Code Pcre2Code; typedef typename Pcre2Type::CompileContext CompileContext; typedef typename Pcre2Type::MatchData MatchData; typedef typename Pcre2Type::GeneralContext GeneralContext; typedef typename Pcre2Type::MatchContext MatchContext; typedef typename Pcre2Type::JitCallback JitCallback; typedef typename Pcre2Type::JitStack JitStack; template static String toString(T); //prevent implicit type conversion of T ///Converts a Char_T to jpcre2::select::String ///@param a Char_T ///@return jpcre2::select::String static String toString(Char a){ return a?String(1, a):String(); } ///@overload ///... ///Converts a Char_T const * to jpcre2::select::String ///@param a Char_T const * ///@return jpcre2::select::String static String toString(Char const *a){ return a?String(a):String(); } ///@overload ///... ///Converts a Char_T* to jpcre2::select::String ///@param a Char_T const * ///@return jpcre2::select::String static String toString(Char* a){ return a?String(a):String(); } ///@overload ///... ///Converts a PCRE2_UCHAR to String ///@param a PCRE2_UCHAR ///@return jpcre2::select::String static String toString(Pcre2Uchar* a) { return a?String((Char*) a):String(); } ///Retruns error message from PCRE2 error number ///@param err_num error number (negative) ///@return message as jpcre2::select::String. static String getPcre2ErrorMessage(int err_num) { Pcre2Uchar buffer[sizeof(Char)*CHAR_BIT*1024]; Pcre2Func::get_error_message(err_num, buffer, sizeof(buffer)); return toString((Pcre2Uchar*) buffer); } ///Returns error message (either JPCRE2 or PCRE2) from error number and error offset ///@param err_num error number (negative for PCRE2, positive for JPCRE2) ///@param err_off error offset ///@return message as jpcre2::select::String. static String getErrorMessage(int err_num, int err_off) { if(err_num == (int)ERROR::INVALID_MODIFIER){ return MSG::INVALID_MODIFIER() + toString((Char)err_off); } else if(err_num == (int)ERROR::INSUFFICIENT_OVECTOR){ return MSG::INSUFFICIENT_OVECTOR(); } else if(err_num != 0) { return getPcre2ErrorMessage((int) err_num); } else return String(); } //forward declaration class Regex; class RegexMatch; class RegexReplace; class MatchEvaluator; /** Provides public constructors to create RegexMatch objects. * Every RegexMatch object should be associated with a Regex object. * This class stores a pointer to its' associated Regex object, thus when * the content of the associated Regex object is changed, there will be no need to * set the pointer again. * * Examples: * * ```cpp * jp::Regex re; * jp::RegexMatch rm; * rm.setRegexObject(&re); * rm.match("subject", "g"); // 0 match * re.compile("\\w"); * rm.match(); // 7 matches * ``` */ class RegexMatch { private: friend class MatchEvaluator; Regex const *re; String m_subject; String const *m_subject_ptr; Uint match_opts; Uint jpcre2_match_opts; MatchContext *mcontext; ModifierTable const * modtab; MatchData * mdata; PCRE2_SIZE _start_offset; //name collision, use _ at start VecNum* vec_num; VecNas* vec_nas; VecNtN* vec_ntn; VecOff* vec_soff; VecOff* vec_eoff; bool getNumberedSubstrings(int, Pcre2Sptr, PCRE2_SIZE*, uint32_t); bool getNamedSubstrings(int, int, Pcre2Sptr, Pcre2Sptr, PCRE2_SIZE*); void init_vars() { re = 0; vec_num = 0; vec_nas = 0; vec_ntn = 0; vec_soff = 0; vec_eoff = 0; match_opts = 0; jpcre2_match_opts = 0; error_number = 0; error_offset = 0; _start_offset = 0; m_subject_ptr = &m_subject; mcontext = 0; modtab = 0; mdata = 0; } void onlyCopy(RegexMatch const &rm){ re = rm.re; //only pointer should be copied //pointer to subject may point to m_subject or other user data m_subject_ptr = (rm.m_subject_ptr == &rm.m_subject) ? &m_subject //not &rm.m_subject : rm.m_subject_ptr; //underlying data of vectors are not handled by RegexMatch //thus it's safe to just copy the pointers. vec_num = rm.vec_num; vec_nas = rm.vec_nas; vec_ntn = rm.vec_ntn; vec_soff = rm.vec_soff; vec_eoff = rm.vec_eoff; match_opts = rm.match_opts; jpcre2_match_opts = rm.jpcre2_match_opts; error_number = rm.error_number; error_offset = rm.error_offset; _start_offset = rm._start_offset; mcontext = rm.mcontext; modtab = rm.modtab; mdata = rm.mdata; } void deepCopy(RegexMatch const &rm){ m_subject = rm.m_subject; onlyCopy(rm); } #ifdef JPCRE2_USE_MINIMUM_CXX_11 void deepMove(RegexMatch& rm){ m_subject = std::move_if_noexcept(rm.m_subject); onlyCopy(rm); } #endif friend class Regex; protected: int error_number; PCRE2_SIZE error_offset; public: ///Default constructor. RegexMatch(){ init_vars(); } ///@overload ///... ///Creates a RegexMatch object associating a Regex object. ///Underlying data is not modified. ///@param r pointer to a Regex object RegexMatch(Regex const *r) { init_vars(); re = r; } ///@overload ///... ///Copy constructor. ///@param rm Reference to RegexMatch object RegexMatch(RegexMatch const &rm){ init_vars(); deepCopy(rm); } ///Overloaded copy-assignment operator. ///@param rm RegexMatch object ///@return A reference to the calling RegexMatch object. virtual RegexMatch& operator=(RegexMatch const &rm){ if(this == &rm) return *this; deepCopy(rm); return *this; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 ///@overload ///... ///Move constructor. ///This constructor steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. ///@param rm rvalue reference to a RegexMatch object RegexMatch(RegexMatch&& rm){ init_vars(); deepMove(rm); } ///@overload ///... ///Overloaded move-assignment operator. ///This constructor steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. ///@param rm rvalue reference to a RegexMatch object ///@return A reference to the calling RegexMatch object. virtual RegexMatch& operator=(RegexMatch&& rm){ if(this == &rm) return *this; deepMove(rm); return *this; } #endif ///Destructor ///Frees all internal memories that were used. virtual ~RegexMatch() {} ///Reset all class variables to its default (initial) state including memory. ///Data in the vectors will retain (as it's external) ///You will need to pass vector pointers again after calling this function to get match results. ///@return Reference to the calling RegexMatch object. virtual RegexMatch& reset() { String().swap(m_subject); //not ptr , external string won't be modified. init_vars(); return *this; } ///Clear all class variables (may retain some memory for further use). ///Data in the vectors will retain (as it's external) ///You will need to pass vector pointers again after calling this function to get match results. ///@return Reference to the calling RegexMatch object. virtual RegexMatch& clear(){ m_subject.clear(); //not ptr , external string won't be modified. init_vars(); return *this; } ///reset match related errors to zero. ///If you want to examine the error status of a function call in the method chain, ///add this function just before your target function so that the error is set to zero ///before that target function is called, and leave everything out after the target ///function so that there will be no additional errors from other function calls. ///@return A reference to the RegexMatch object ///@see Regex::resetErrors() ///@see RegexReplace::resetErrors() virtual RegexMatch& resetErrors(){ error_number = 0; error_offset = 0; return *this; } /// Returns the last error number ///@return Last error number virtual int getErrorNumber() const { return error_number; } /// Returns the last error offset ///@return Last error offset virtual int getErrorOffset() const { return (int)error_offset; } /// Returns the last error message ///@return Last error message virtual String getErrorMessage() const { #ifdef JPCRE2_USE_MINIMUM_CXX_11 return select::getErrorMessage(error_number, error_offset); #else return select::getErrorMessage(error_number, error_offset); #endif } ///Get subject string (by value). ///@return subject string ///@see RegexReplace::getSubject() virtual String getSubject() const { return *m_subject_ptr; } ///Get pointer to subject string. ///Data can not be changed with this pointer. ///@return constant subject string pointer ///@see RegexReplace::getSubjectPointer() virtual String const * getSubjectPointer() const { return m_subject_ptr; } /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. /// /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized /// as long as you don't do that explicitly. Calling RegexMatch::setModifier() will re-set them. /// /// **Mixed or combined modifier**. /// /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options /// get set, and when you remove the 'n' modifier (with `RegexMatch::changeModifier()`), both will get removed. ///@return Calculated modifier string (std::string) ///@see Regex::getModifier() ///@see RegexReplace::getModifier() virtual std::string getModifier() const { return modtab ? modtab->fromMatchOption(match_opts, jpcre2_match_opts) : MOD::fromMatchOption(match_opts, jpcre2_match_opts); } ///Get the modifier table that is set, ///@return pointer to constant ModifierTable. virtual ModifierTable const* getModifierTable(){ return modtab; } ///Get PCRE2 option ///@return PCRE2 option for match operation ///@see Regex::getPcre2Option() ///@see RegexReplace::getPcre2Option() virtual Uint getPcre2Option() const { return match_opts; } /// Get JPCRE2 option ///@return JPCRE2 options for math operation ///@see Regex::getJpcre2Option() ///@see RegexReplace::getJpcre2Option() virtual Uint getJpcre2Option() const { return jpcre2_match_opts; } /// Get offset from where match will start in the subject. /// @return Start offset virtual PCRE2_SIZE getStartOffset() const { return _start_offset; } ///Get pre-set match start offset vector pointer. ///The pointer must be set with RegexMatch::setMatchStartOffsetVector() beforehand ///for this to work i.e it is just a convenience method to get the pre-set vector pointer. ///@return pointer to the const match start offset vector virtual VecOff const* getMatchStartOffsetVector() const { return vec_soff; } ///Get pre-set match end offset vector pointer. ///The pointer must be set with RegexMatch::setMatchEndOffsetVector() beforehand ///for this to work i.e it is just a convenience method to get the pre-set vector pointer. ///@return pointer to the const end offset vector virtual VecOff const* getMatchEndOffsetVector() const { return vec_eoff; } ///Get a pointer to the associated Regex object. ///If no actual Regex object is associated, null is returned. ///@return A pointer to the associated constant Regex object or null. virtual Regex const * getRegexObject() const { return re; } ///Get pointer to numbered substring vector. ///@return Pointer to const numbered substring vector. virtual VecNum const* getNumberedSubstringVector() const { return vec_num; } ///Get pointer to named substring vector. ///@return Pointer to const named substring vector. virtual VecNas const* getNamedSubstringVector() const { return vec_nas; } ///Get pointer to name to number map vector. ///@return Pointer to const name to number map vector. virtual VecNtN const* getNameToNumberMapVector() const { return vec_ntn; } ///Set the associated regex object. ///Null pointer unsets it. ///Underlying data is not modified. ///@param r Pointer to a Regex object. ///@return Reference to the calling RegexMatch object. virtual RegexMatch& setRegexObject(Regex const *r){ re = r; return *this; } /// Set a pointer to the numbered substring vector. /// Null pointer unsets it. /// /// This vector will be filled with numbered (indexed) captured groups. /// @param v pointer to the numbered substring vector /// @return Reference to the calling RegexMatch object virtual RegexMatch& setNumberedSubstringVector(VecNum* v) { vec_num = v; return *this; } /// Set a pointer to the named substring vector. /// Null pointer unsets it. /// /// This vector will be populated with named captured groups. /// @param v pointer to the named substring vector /// @return Reference to the calling RegexMatch object virtual RegexMatch& setNamedSubstringVector(VecNas* v) { vec_nas = v; return *this; } /// Set a pointer to the name to number map vector. /// Null pointer unsets it. /// /// This vector will be populated with name to number map for captured groups. /// @param v pointer to the name to number map vector /// @return Reference to the calling RegexMatch object virtual RegexMatch& setNameToNumberMapVector(VecNtN* v) { vec_ntn = v; return *this; } /// Set the pointer to a vector to store the offsets where matches /// start in the subject. /// Null pointer unsets it. /// @param v Pointer to a jpcre2::VecOff vector (std::vector) /// @return Reference to the calling RegexMatch object virtual RegexMatch& setMatchStartOffsetVector(VecOff* v){ vec_soff = v; return *this; } /// Set the pointer to a vector to store the offsets where matches /// end in the subject. /// Null pointer unsets it. /// @param v Pointer to a VecOff vector (std::vector) /// @return Reference to the calling RegexMatch object virtual RegexMatch& setMatchEndOffsetVector(VecOff* v){ vec_eoff = v; return *this; } ///Set the subject string for match. ///This makes a copy of the subject string. /// @param s Subject string /// @return Reference to the calling RegexMatch object /// @see RegexReplace::setSubject() virtual RegexMatch& setSubject(String const &s) { m_subject = s; m_subject_ptr = &m_subject; //must overwrite return *this; } ///@overload ///... /// Works with the original without modifying it. Null pointer unsets the subject. /// @param s Pointer to subject string /// @return Reference to the calling RegexMatch object /// @see RegexReplace::setSubject() virtual RegexMatch& setSubject(String const *s) { if(s) m_subject_ptr = s; else { m_subject.clear(); m_subject_ptr = &m_subject; } return *this; } /// Set the modifier (resets all JPCRE2 and PCRE2 options) by calling RegexMatch::changeModifier(). /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier to set their equivalent options. /// @param s Modifier string. /// @return Reference to the calling RegexMatch object /// @see RegexReplace::setModifier() /// @see Regex::setModifier() virtual RegexMatch& setModifier(Modifier const& s) { match_opts = 0; jpcre2_match_opts = 0; changeModifier(s, true); return *this; } ///Set a custom modifier table to be used. ///@param mdt pointer to ModifierTable object. ///@return Reference to the calling RegexMatch object. virtual RegexMatch& setModifierTable(ModifierTable const * mdt){ modtab = mdt; return *this; } /// Set JPCRE2 option for match (resets all) /// @param x Option value /// @return Reference to the calling RegexMatch object /// @see RegexReplace::setJpcre2Option() /// @see Regex::setJpcre2Option() virtual RegexMatch& setJpcre2Option(Uint x) { jpcre2_match_opts = x; return *this; } ///Set PCRE2 option match (overwrite existing option) /// @param x Option value /// @return Reference to the calling RegexMatch object /// @see RegexReplace::setPcre2Option() /// @see Regex::setPcre2Option() virtual RegexMatch& setPcre2Option(Uint x) { match_opts = x; return *this; } /// Set whether to perform global match /// @param x True or False /// @return Reference to the calling RegexMatch object virtual RegexMatch& setFindAll(bool x) { jpcre2_match_opts = x?jpcre2_match_opts | FIND_ALL:jpcre2_match_opts & ~FIND_ALL; return *this; } ///@overload ///... ///This function just calls RegexMatch::setFindAll(bool x) with `true` as the parameter ///@return Reference to the calling RegexMatch object virtual RegexMatch& setFindAll() { return setFindAll(true); } /// Set offset from where match starts. /// When FIND_ALL is set, a global match would not be performed on all positions on the subject, /// rather it will be performed from the start offset and onwards. /// @param offset Start offset /// @return Reference to the calling RegexMatch object virtual RegexMatch& setStartOffset(PCRE2_SIZE offset) { _start_offset = offset; return *this; } ///Set the match context. ///You can create match context using the native PCRE2 API. ///The memory is not handled by RegexMatch object and not freed. ///User will be responsible for freeing the memory of the match context. ///@param match_context Pointer to the match context. ///@return Reference to the calling RegexMatch object virtual RegexMatch& setMatchContext(MatchContext *match_context){ mcontext = match_context; return *this; } ///Return pointer to the match context that was previously set with setMatchContext(). ///Handling memory is the callers' responsibility. ///@return pointer to the match context (default: null). virtual MatchContext* getMatchContext(){ return mcontext; } ///Set the match data block to be used. ///The memory is not handled by RegexMatch object and not freed. ///User will be responsible for freeing the memory of the match data block. ///@param madt Pointer to a match data block. ///@return Reference to the calling RegexMatch object virtual RegexMatch& setMatchDataBlock(MatchData* madt){ mdata = madt; return *this; } ///Get the pointer to the match data block that was set previously with setMatchData() ///Handling memory is the callers' responsibility. ///@return pointer to the match data (default: null). virtual MatchData* getMatchDataBlock(){ return mdata; } /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. /// This function does not initialize or re-initialize options. /// If you want to set options from scratch, initialize them to 0 before calling this function. /// If invalid modifier is detected, then the error number for the RegexMatch /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. /// You can get the message with RegexMatch::getErrorMessage() function. /// /// @param mod Modifier string. /// @param x Whether to add or remove option /// @return Reference to the RegexMatch object /// @see Regex::changeModifier() /// @see RegexReplace::changeModifier() virtual RegexMatch& changeModifier(Modifier const& mod, bool x){ modtab ? modtab->toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset) : MOD::toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset); return *this; } /// Add or remove a JPCRE2 option /// @param opt JPCRE2 option value /// @param x Add the option if it's true, remove otherwise. /// @return Reference to the calling RegexMatch object /// @see RegexReplace::changeJpcre2Option() /// @see Regex::changeJpcre2Option() virtual RegexMatch& changeJpcre2Option(Uint opt, bool x) { jpcre2_match_opts = x ? jpcre2_match_opts | opt : jpcre2_match_opts & ~opt; return *this; } /// Add or remove a PCRE2 option /// @param opt PCRE2 option value /// @param x Add the option if it's true, remove otherwise. /// @return Reference to the calling RegexMatch object /// @see RegexReplace::changePcre2Option() /// @see Regex::changePcre2Option() virtual RegexMatch& changePcre2Option(Uint opt, bool x) { match_opts = x ? match_opts | opt : match_opts & ~opt; return *this; } /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. /// This is just a wrapper of the original function RegexMatch::changeModifier() /// @param mod Modifier string. /// @return Reference to the calling RegexMatch object /// @see RegexReplace::addModifier() /// @see Regex::addModifier() virtual RegexMatch& addModifier(Modifier const& mod){ return changeModifier(mod, true); } /// Add option to existing JPCRE2 options for match /// @param x Option value /// @return Reference to the calling RegexMatch object /// @see RegexReplace::addJpcre2Option() /// @see Regex::addJpcre2Option() virtual RegexMatch& addJpcre2Option(Uint x) { jpcre2_match_opts |= x; return *this; } /// Add option to existing PCRE2 options for match /// @param x Option value /// @return Reference to the calling RegexMatch object /// @see RegexReplace::addPcre2Option() /// @see Regex::addPcre2Option() virtual RegexMatch& addPcre2Option(Uint x) { match_opts |= x; return *this; } /// Perform match operation using info from class variables and return the match count and /// store the results in specified vectors. /// /// Note: This function uses pcre2_match() function to do the match. ///@return Match count virtual SIZE_T match(void); }; ///This class contains a typedef of a function pointer or a templated function wrapper (`std::function`) ///to provide callback function to the `MatchEvaluator`. ///`std::function` is used when `>=C++11` is being used , otherwise function pointer is used. ///You can force using function pointer instead of `std::function` when `>=C++11` is used by defining the macro ///`JPCRE2_USE_FUNCTION_POINTER_CALLBACK` before including jpcre2.hpp. ///If you are using lambda function with capture, you must use the `std::function` approach. /// ///The callback function takes exactly three positional arguments: ///@tparam T1 The first argument must be `jp::NumSub const &` aka `std::vector const &` (or `void*` if not needed). ///@tparam T2 The second argument must be `jp::MapNas const &` aka `std::map const &` (or `void*` if not needed). ///@tparam T3 The third argument must be `jp::MapNtN const &` aka `std::map const &` (or `void*` if not needed). /// /// **Examples:** /// ```cpp /// typedef jpcre2::select jp; /// jp::String myCallback1(jp::NumSub const &m1, void*, void*){ /// return "("+m1[0]+")"; /// } /// /// jp::String myCallback2(jp::NumSub const &m1, jp::MapNas const &m2, void*){ /// return "("+m1[0]+"/"+m2.at("total")+")"; /// } /// //Now you can pass these functions in MatchEvaluator constructors to create a match evaluator /// jp::MatchEvaluator me1(myCallback1); /// /// //Examples with lambda (>=C++11) /// jp::MatchEvaluator me2([](jp::NumSub const &m1, void*, void*) /// { /// return "("+m1[0]+")"; /// }); /// ``` ///@see MatchEvaluator template struct MatchEvaluatorCallback{ #if !defined JPCRE2_USE_FUNCTION_POINTER_CALLBACK && JPCRE2_USE_MINIMUM_CXX_11 typedef std::function Callback; #else typedef String (*Callback)(T1,T2,T3); #endif }; ///Provides some default static callback functions. ///The primary goal of this class is to provide default ///callback function to MatchEvaluator default constructor which is ///essentially callback::erase. ///This class does not allow object instantiation. struct callback{ ///Callback function that removes the matched part/s in the subject string /// and takes all match vectors as argument. ///Even though this function itself does not use the vectors, it still takes them ///so that the caller can perform a match and populate all the match data to perform ///further evaluation of other callback functions without doing the match again. ///@param num jp::NumSub vector. ///@param nas jp::MapNas map. ///@param ntn jp::MapNtN map. ///@return empty string. static String eraseFill(NumSub const &num, MapNas const &nas, MapNtN const &ntn){ return String(); } ///Callback function that removes the matched part/s in the subject string ///and does not take any match vector. ///This is a minimum cost pattern deleting callback function. /// ///It's the default callback function when you Instantiate ///a MatchEvaluator object with its default constructor: ///```cpp ///MatchEvaluator me; ///``` ///@return empty string. static String erase(void*, void*, void*){ return String(); } ///Callback function for populating match vectors that does not modify the subject string. ///It always returns the total matched part and thus the subject string remains the same. ///@param num jp::NumSub vector. ///@param nas jp::MapNas map. ///@param ntn jp::MapNtN map. ///@return total match (group 0) of current match. static String fill(NumSub const &num, MapNas const &nas, MapNtn const &ntn){ #ifdef JPCRE2_UNSET_CAPTURES_NULL return *num[0]; #else return num[0]; #endif } private: //prevent object instantiation. callback(); callback(callback const &); #ifdef JPCRE2_USE_MINIMUM_CXX_11 callback(callback&&); #endif ~callback(); }; ///This class inherits RegexMatch and provides a similar functionality. ///All public member functions from RegexMatch class are publicly available except the following: ///* setNumberedSubstringVector ///* setNamedSubstringVector ///* setNameToNumberMapVector ///* setMatchStartOffsetVector ///* setMatchEndOffsetVector /// ///The use of above functions is not allowed as the vectors are created according to the callback function you pass. /// ///Each constructor of this class takes a callback function as argument (see `MatchEvaluatorCallback`). /// ///It provides a MatchEvaluator::nreplace() function to perform replace operation using native JPCRE2 approach ///and `MatchEvaluator::replace()` function for PCRE2 compatible replace operation. /// ///An instance of this class can also be passed with `RegexReplace::nreplace()` or `RegexReplace::replace()` function to perform replacement ///according to this match evaluator. /// ///Match data is stored in vectors, and the vectors are populated according to the callback functions. ///Populated vector data is never deleted but they get overwritten. Vector data can be manually zeroed out ///by calling `MatchEvaluator::clearMatchData()`. If the capacities of those match vectors are desired to ///to be shrinked too instead of just clearing them, use `MatchEvaluator::resetMatchData()` instead. /// /// # Re-usability of Match Data /// A match data populated with a callback function that takes only a jp::NumSub vector is not compatible /// with the data created according to callback function with a jp::MapNas vector. /// Because, for this later callback, jp::MapNas data is required but is not available (only jp::NumSub is available). /// In such cases, previous Match data can not be used to perform a new replacment operation with this second callback function. /// /// To populate the match vectors, one must call the `MatchEvaluator::match()` or `MatchEvaluator::nreplace()` function, they will populate /// vectors with match data according to call back function. /// /// ## Example: /// /// ```cpp /// jp::String callback5(NumSub const &m, void*, MapNtn const &n){ /// return m[0]; /// } /// jp::String callback4(void*, void*, MapNtn const &n){ /// return std::to_string(n.at("name")); //position of group 'name'. /// } /// jp::String callback2(void*, MapNas const &m, void*){ /// return m.at('name'); //substring by name /// } /// /// jp::MatchEvaluator me; /// me.setRegexObject(&re).setSubject("string").setCallback(callback5).nreplace(); /// //In above, nreplace() populates jp::NumSub and jp::MapNtn with match data. /// /// me.setCallback(callback4).nreplace(false); /// //the above uses previous match result (note the 'false') which is OK, /// //because, callback4 requires jp::MapNtn which was made available in the previous operation. /// /// //but the following is not OK: (assertion failure) /// me.setCallback(callback2).nreplace(false); /// //because, callback2 requires jp::MapNas data which is not available. /// //now, this is OK: /// me.setCallback(callback2).nreplace(); /// //because, it will recreate those match data including this one (jp::MapNas). /// ``` /// /// # Replace options /// MatchEvaluator can not take replace options. /// Replace options are taken directly by the replace functions: `nreplace()` and `replace()`. /// /// # Using as a match object /// As it's just a subclass of RegexMatch, it can do all the things that RegexMatch can do, with some restrictions: /// * matching options are modified to strip off bad options according to replacement (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT). /// * match depends on the callback function. Only those vectors will be populated that are implemented by the callback functions so far /// (multiple callback function will set multiple match data vectors.) /// * match vectors are internal to this class, you can not set them manually (without callback function). (you can get pointers to these vectors /// with `getNumberedSubstringVector()` and related functions). /// ///@see MatchEvaluatorCallback ///@see RegexReplace::nreplace() class MatchEvaluator: virtual public RegexMatch{ private: friend class RegexReplace; VecNum vec_num; VecNas vec_nas; VecNtN vec_ntn; VecOff vec_soff; VecOff vec_eoff; int callbackn; typename MatchEvaluatorCallback::Callback callback0; typename MatchEvaluatorCallback::Callback callback1; typename MatchEvaluatorCallback::Callback callback2; typename MatchEvaluatorCallback::Callback callback3; typename MatchEvaluatorCallback::Callback callback4; typename MatchEvaluatorCallback::Callback callback5; typename MatchEvaluatorCallback::Callback callback6; typename MatchEvaluatorCallback::Callback callback7; //Q: Why the callback names seem random? is it random? //A: No, it's not random, NumSub = 1, MapNas = 2, MapNtn = 4, thus: // NumSub + MapNas = 3 // NumSub + MapNtn = 5 // MapNas + MapNtn = 6 // NumSub + MapNas + MapNtn = 7 //Q: Why is it like this? //A: It's historical. Once, there was not this many callback declaration, there was only one (a templated one). // The nreplace function itself used to calculate a mode value according to available vectors // and determine what kind of callback function needed to be called. //Q: Why the history changed? //A: We had some compatibility issues with the single templated callback. // Also, this approach proved to be more readable and robust. PCRE2_SIZE buffer_size; void init(){ callbackn = 0; callback0 = callback::erase; callback1 = 0; callback2 = 0; callback3 = 0; callback4 = 0; callback5 = 0; callback6 = 0; callback7 = 0; setMatchStartOffsetVector(&vec_soff); setMatchEndOffsetVector(&vec_eoff); buffer_size = 0; } void setVectorPointersAccordingToCallback(){ switch(callbackn){ case 0: break; case 1: setNumberedSubstringVector(&vec_num);break; case 2: setNamedSubstringVector(&vec_nas);break; case 3: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas);break; case 4: setNameToNumberMapVector(&vec_ntn);break; case 5: setNumberedSubstringVector(&vec_num).setNameToNumberMapVector(&vec_ntn);break; case 6: setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break; case 7: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break; } } void onlyCopy(MatchEvaluator const &me){ callbackn = me.callbackn; callback0 = me.callback0; callback1 = me.callback1; callback2 = me.callback2; callback3 = me.callback3; callback4 = me.callback4; callback5 = me.callback5; callback6 = me.callback6; callback7 = me.callback7; //must update the pointers to point to this class vectors. setVectorPointersAccordingToCallback(); buffer_size = me.buffer_size; } void deepCopy(MatchEvaluator const &me) { vec_num = me.vec_num; vec_nas = me.vec_nas; vec_ntn = me.vec_ntn; vec_soff = me.vec_soff; vec_eoff = me.vec_eoff; onlyCopy(me); } #ifdef JPCRE2_USE_MINIMUM_CXX_11 void deepMove(MatchEvaluator& me){ vec_num = std::move_if_noexcept(me.vec_num); vec_nas = std::move_if_noexcept(me.vec_nas); vec_ntn = std::move_if_noexcept(me.vec_ntn); vec_soff = std::move_if_noexcept(me.vec_soff); vec_eoff = std::move_if_noexcept(me.vec_eoff); onlyCopy(me); } #endif //prevent public access to some funcitons MatchEvaluator& setNumberedSubstringVector(VecNum* v){ RegexMatch::setNumberedSubstringVector(v); return *this; } MatchEvaluator& setNamedSubstringVector(VecNas* v){ RegexMatch::setNamedSubstringVector(v); return *this; } MatchEvaluator& setNameToNumberMapVector(VecNtN* v){ RegexMatch::setNameToNumberMapVector(v); return *this; } MatchEvaluator& setMatchStartOffsetVector(VecOff* v){ RegexMatch::setMatchStartOffsetVector(v); return *this; } MatchEvaluator& setMatchEndOffsetVector(VecOff* v){ RegexMatch::setMatchEndOffsetVector(v); return *this; } public: ///Default constructor. ///Sets callback::erase as the callback function. ///Removes matched part/s from the subject string if the callback is not ///changed. /// ```cpp /// jp::Regex re("\s*string"); /// jp::MatchEvaluator me; /// std::cout<< /// me.setRegexObject(&re); /// .setSubject("I am a string"); /// .nreplace(); /// //The above will delete ' string' from the subject /// //thus the result will be 'I am a' /// ``` explicit MatchEvaluator():RegexMatch(){ init(); } ///@overload ///... ///Constructor taking a Regex object pointer. ///It sets the associated Regex object and ///initializes the MatchEvaluator object with ///callback::erase callback function. ///Underlying data is not modified. ///@param r constant Regex pointer. explicit MatchEvaluator(Regex const *r):RegexMatch(r){ init(); } ///@overload ///... ///Constructor taking a callback function. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. ///@param mef Callback function. explicit MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ init(); setCallback(mef); } ///@overload /// ... ///Copy constructor. ///@param me Reference to MatchEvaluator object MatchEvaluator(MatchEvaluator const &me): RegexMatch(me){ init(); deepCopy(me); } ///Overloaded copy-assignment operator ///@param me MatchEvaluator object ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& operator=(MatchEvaluator const &me){ if(this == &me) return *this; RegexMatch::operator=(me); deepCopy(me); return *this; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 ///@overload /// ... ///Move constructor. ///This constructor steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. ///@param me rvalue reference to a MatchEvaluator object MatchEvaluator(MatchEvaluator&& me): RegexMatch(me){ init(); deepMove(me); } ///@overload ///... ///Overloaded move-assignment operator. ///It steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. ///@param me rvalue reference to a MatchEvaluator object ///@return A reference to the calling MatchEvaluator object. ///@see MatchEvaluator(MatchEvaluator&& me) MatchEvaluator& operator=(MatchEvaluator&& me){ if(this == &me) return *this; RegexMatch::operator=(me); deepMove(me); return *this; } #endif virtual ~MatchEvaluator(){} ///Member function to set a callback function with no vector reference. ///Callback function is always overwritten. The implemented vectors are set to be filled with match data. ///Other vectors that were set previously, are not unset and thus they will be filled with match data too ///when `match()` or `nreplace()` is called. ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback0 = mef; callbackn = 0; return *this; } ///@overload /// ... ///Sets a callback function with a jp::NumSub vector. ///You will be working with a reference to the constant vector. ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback1 = mef; callbackn = 1; setNumberedSubstringVector(&vec_num); return *this; } ///@overload /// ... ///Sets a callback function with a jp::NumSub and jp::MapNas. ///You will be working with references of the constant vectors. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: ///```cpp ///map_nas["word"]; //wrong ///map_nas.at("word"); //ok ///``` ///If you want to use `[]` operator with maps, make a copy: ///```cpp ///jp::MapNas mn = map_nas; ///mn["word"]; //ok ///``` ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback3 = mef; callbackn = 3; setNumberedSubstringVector(&vec_num); setNamedSubstringVector(&vec_nas); return *this; } ///@overload /// ... ///Sets a callback function with a jp::NumSub and jp::MapNtN. ///You will be working with references of the constant vectors. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: ///```cpp ///map_ntn["word"]; //wrong ///map_ntn.at("word"); //ok ///``` ///If you want to use `[]` operator with maps, make a copy: ///```cpp ///jp::MapNtN mn = map_ntn; ///mn["word"]; //ok ///``` ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback5 = mef; callbackn = 5; setNumberedSubstringVector(&vec_num); setNameToNumberMapVector(&vec_ntn); return *this; } ///@overload /// ... ///Sets a callback function with a jp::NumSub, jp::MapNas, jp::MapNtN. ///You will be working with references of the constant vectors. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: ///```cpp ///map_nas["word"]; //wrong ///map_nas.at("word"); //ok ///``` ///If you want to use `[]` operator with maps, make a copy: ///```cpp ///jp::MapNas mn = map_nas; ///mn["word"]; //ok ///``` ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback7 = mef; callbackn = 7; setNumberedSubstringVector(&vec_num); setNamedSubstringVector(&vec_nas); setNameToNumberMapVector(&vec_ntn); return *this; } ///@overload /// ... ///Sets a callback function with a jp::MapNas. ///You will be working with reference of the constant vector. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: ///```cpp ///map_nas["word"]; //wrong ///map_nas.at("word"); //ok ///``` ///If you want to use `[]` operator with maps, make a copy: ///```cpp ///jp::MapNas mn = map_nas; ///mn["word"]; //ok ///``` ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback2 = mef; callbackn = 2; setNamedSubstringVector(&vec_nas); return *this; } ///@overload /// ... ///Sets a callback function with a jp::MapNas, jp::MapNtN. ///You will be working with reference of the constant vector. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: ///```cpp ///map_nas["word"]; //wrong ///map_nas.at("word"); //ok ///``` ///If you want to use `[]` operator with maps, make a copy: ///```cpp ///jp::MapNas mn = map_nas; ///mn["word"]; //ok ///``` ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback6 = mef; callbackn = 6; setNamedSubstringVector(&vec_nas); setNameToNumberMapVector(&vec_ntn); return *this; } ///@overload /// ... ///Sets a callback function with a jp::MapNtN. ///You will be working with references of the constant vectors. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: ///```cpp ///map_ntn["word"]; //wrong ///map_ntn.at("word"); //ok ///``` ///If you want to use `[]` operator with maps, make a copy: ///```cpp ///jp::MapNtN mn = map_ntn; ///mn["word"]; //ok ///``` ///@param mef Callback function. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ callback4 = mef; callbackn = 4; setNameToNumberMapVector(&vec_ntn); return *this; } ///Clear match data. ///It clears all match data from all vectors (without shrinking). ///For shrinking the vectors, use `resetMatchData()` ///A call to `match()` or nreplace() will be required to produce match data again. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& clearMatchData(){ vec_num.clear(); vec_nas.clear(); vec_ntn.clear(); vec_soff.clear(); vec_eoff.clear(); return *this; } ///Reset match data to initial state. ///It deletes all match data from all vectors shrinking their capacity. ///A call to `match()` or nreplace() will be required to produce match data again. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& resetMatchData(){ VecNum().swap(vec_num); VecNas().swap(vec_nas); VecNtN().swap(vec_ntn); VecOff().swap(vec_soff); VecOff().swap(vec_eoff); return *this; } ///Reset MatchEvaluator to initial state including memory. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& reset(){ RegexMatch::reset(); resetMatchData(); init(); return *this; } ///Clears MatchEvaluator. ///Returns everything to initial state (some memory may retain for further and faster use). ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& clear(){ RegexMatch::clear(); clearMatchData(); init(); return *this; } ///Call RegexMatch::resetErrors(). ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& resetErrors(){ RegexMatch::resetErrors(); return *this; } ///Call RegexMatch::setRegexObject(r). ///@param r constant Regex object pointer ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setRegexObject (Regex const *r){ RegexMatch::setRegexObject(r); return *this; } ///Call RegexMatch::setSubject(String const &s). ///@param s subject string. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setSubject (String const &s){ RegexMatch::setSubject(s); return *this; } ///@overload ///@param s constant subject string by pointer ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setSubject (String const *s){ RegexMatch::setSubject(s); return *this; } ///Call RegexMatch::setModifier(Modifier const& s). ///@param s modifier string. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setModifier (Modifier const& s){ RegexMatch::setModifier(s); return *this; } ///Call RegexMatch::setModifierTable(ModifierTable const * s). ///@param mdt pointer to ModifierTable object. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setModifierTable (ModifierTable const * mdt){ RegexMatch::setModifierTable(mdt); return *this; } ///Call RegexMatch::setJpcre2Option(Uint x). ///@param x JPCRE2 option value. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setJpcre2Option (Uint x){ RegexMatch::setJpcre2Option(x); return *this; } ///Call RegexMatch::setPcre2Option (Uint x). ///@param x PCRE2 option value. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setPcre2Option (Uint x){ RegexMatch::setPcre2Option(x); return *this; } ///Call RegexMatch::setFindAll(bool x). ///@param x true if global match, false otherwise. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setFindAll (bool x){ RegexMatch::setFindAll(x); return *this; } ///Call RegexMatch::setFindAll(). ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setFindAll(){ RegexMatch::setFindAll(); return *this; } ///Call RegexMatch::setStartOffset (PCRE2_SIZE offset). ///@param offset match start offset in the subject. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setStartOffset (PCRE2_SIZE offset){ RegexMatch::setStartOffset(offset); return *this; } ///Call RegexMatch::setMatchContext(MatchContext *match_context). ///@param match_context pointer to match context. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setMatchContext (MatchContext *match_context){ RegexMatch::setMatchContext(match_context); return *this; } ///Call RegexMatch::setMatchDataBlock(MatchContext * mdt); ///@param mdt pointer to match data block ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setMatchDataBlock(MatchData* mdt){ RegexMatch::setMatchDataBlock(mdt); return *this; } ///Set the buffer size that will be used by pcre2_substitute (replace()). ///If buffer size proves to be enough to fit the resultant string ///from each match (not the total resultant string), it will yield one less call ///to pcre2_substitute for each match. ///@param x buffer size. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& setBufferSize(PCRE2_SIZE x){ buffer_size = x; return *this; } ///Get the initial buffer size that is being used by internal function pcre2_substitute ///@return buffer_size PCRE2_SIZE getBufferSize(){ return buffer_size; } ///Call RegexMatch::changeModifier(Modifier const& mod, bool x). ///@param mod modifier string. ///@param x true (add) or false (remove). ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& changeModifier (Modifier const& mod, bool x){ RegexMatch::changeModifier(mod, x); return *this; } ///Call RegexMatch::changeJpcre2Option(Uint opt, bool x). ///@param opt JPCRE2 option ///@param x true (add) or false (remove). ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& changeJpcre2Option (Uint opt, bool x){ RegexMatch::changeJpcre2Option(opt, x); return *this; } ///Call RegexMatch::changePcre2Option(Uint opt, bool x). ///@param opt PCRE2 option. ///@param x true (add) or false (remove). ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& changePcre2Option (Uint opt, bool x){ RegexMatch::changePcre2Option(opt, x); return *this; } ///Call RegexMatch::addModifier(Modifier const& mod). ///@param mod modifier string. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& addModifier (Modifier const& mod){ RegexMatch::addModifier(mod); return *this; } ///Call RegexMatch::addJpcre2Option(Uint x). ///@param x JPCRE2 option. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& addJpcre2Option (Uint x){ RegexMatch::addJpcre2Option(x); return *this; } ///Call RegexMatch::addPcre2Option(Uint x). ///@param x PCRE2 option. ///@return A reference to the calling MatchEvaluator object. MatchEvaluator& addPcre2Option (Uint x){ RegexMatch::addPcre2Option(x); return *this; } ///Perform match and return the match count. ///This function strips off matching options (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT) that are considered ///bad options for replacement operation and then calls ///RegexMatch::match() to perform the match. ///@return match count. SIZE_T match(void){ //remove bad matching options RegexMatch::changePcre2Option(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT, false); return RegexMatch::match(); } ///Perform regex replace with this match evaluator. ///This is a JPCRE2 native replace function (thus the name nreplace). ///It uses the `MatchEvaluatorCallback` function that was set with a constructor or `MatchEvaluator::setCallback()` function ///to generate the replacement strings on the fly. ///The string returned by the callback function will be treated as literal and will ///not go through any further processing. /// ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument. ///To use existing match data that was created by a previous `MatchEvaluator::nreplace()` or `MatchEvaluator::match()`, call this ///function with boolean `false` as the first argument. /// ///## Complexity /// 1. Changes in replace related option takes effect without a re-match. /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect. /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error. /// 4. If the associated Regex object or subject string changes, a new match must be performed, /// trying to use the existing match data in such cases is undefined behavior. /// ///@param do_match Perform a new matching operation if true, otherwise use existing match data. ///@param jro JPCRE2 replace options. ///@param counter Pointer to a counter to store the number of replacement done. ///@return resultant string after replace. ///@see MatchEvaluator. ///@see MatchEvaluatorCallback. String nreplace(bool do_match=true, Uint jro=0, SIZE_T* counter=0); ///PCRE2 compatible replace function that uses this MatchEvaluator. ///Performs regex replace with pcre2_substitute function ///by generating the replacement strings dynamically with MatchEvaluator callback. ///The string returned by callback function is processed by internal pcre2_substitute, thus allowing ///all options that are provided by PCRE2 itself. /// ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument. /// ///## Complexity /// 1. Changes in replace related option takes effect without a re-match. /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect. /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error. /// 4. If the associated Regex object or subject string changes, a new match must be performed, /// trying to use the existing match data in such cases is undefined behavior. /// ///@param do_match perform a new match if true, otherwise use existing data. ///@param ro replace related PCRE2 options. ///@param counter Pointer to a counter to store the number of replacement done. ///@return resultant string after replacement. String replace(bool do_match=true, Uint ro=0, SIZE_T* counter=0); }; /** Provides public constructors to create RegexReplace objects. * Every RegexReplace object should be associated with a Regex object. * This class stores a pointer to its' associated Regex object, thus when * the content of the associated Regex object is changed, there's no need to * set the pointer again. * * Examples: * * ```cpp * jp::Regex re; * jp::RegexReplace rr; * rr.setRegexObject(&re); * rr.replace("subject", "me"); // returns 'subject' * re.compile("\\w+"); * rr.replace(); // replaces 'subject' with 'me' i.e returns 'me' * ``` */ class RegexReplace { private: friend class Regex; Regex const *re; String r_subject; String *r_subject_ptr; //preplace method modifies it in-place String r_replw; String const *r_replw_ptr; Uint replace_opts; Uint jpcre2_replace_opts; PCRE2_SIZE buffer_size; PCRE2_SIZE _start_offset; MatchData *mdata; MatchContext *mcontext; ModifierTable const * modtab; SIZE_T last_replace_count; SIZE_T* last_replace_counter; void init_vars() { re = 0; r_subject_ptr = &r_subject; r_replw_ptr = &r_replw; replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; jpcre2_replace_opts = 0; buffer_size = 0; error_number = 0; error_offset = 0; _start_offset = 0; mdata = 0; mcontext = 0; modtab = 0; last_replace_count = 0; last_replace_counter = &last_replace_count; } void onlyCopy(RegexReplace const &rr){ re = rr.re; //only pointer should be copied. //rr.r_subject_ptr may point to rr.r_subject or other user data r_subject_ptr = (rr.r_subject_ptr == &rr.r_subject) ? &r_subject //not rr.r_subject : rr.r_subject_ptr; //other user data r_replw = rr.r_replw; //rr.r_replw_ptr may point to rr.r_replw or other user data r_replw_ptr = (rr.r_replw_ptr == &rr.r_replw) ? &r_replw //not rr.r_replw : rr.r_replw_ptr; //other user data replace_opts = rr.replace_opts; jpcre2_replace_opts = rr.jpcre2_replace_opts; buffer_size = rr.buffer_size; error_number = rr.error_number; error_offset = rr.error_offset; _start_offset = rr._start_offset; mdata = rr.mdata; mcontext = rr.mcontext; modtab = rr.modtab; last_replace_count = rr.last_replace_count; last_replace_counter = (rr.last_replace_counter == &rr.last_replace_count) ? &last_replace_count : rr.last_replace_counter; } void deepCopy(RegexReplace const &rr){ r_subject = rr.r_subject; onlyCopy(rr); } #ifdef JPCRE2_USE_MINIMUM_CXX_11 void deepMove(RegexReplace& rr){ r_subject = std::move_if_noexcept(rr.r_subject); onlyCopy(rr); } #endif protected: int error_number; PCRE2_SIZE error_offset; public: ///Default constructor RegexReplace(){ init_vars(); } ///@overload /// ... ///Creates a RegexReplace object associating a Regex object. ///Regex object is not modified. ///@param r pointer to a Regex object RegexReplace(Regex const *r) { init_vars(); re = r; } ///@overload ///... ///Copy constructor. ///@param rr RegexReplace object reference RegexReplace(RegexReplace const &rr){ init_vars(); deepCopy(rr); } ///Overloaded Copy assignment operator. ///@param rr RegexReplace object reference ///@return A reference to the calling RegexReplace object RegexReplace& operator=(RegexReplace const &rr){ if(this == &rr) return *this; deepCopy(rr); return *this; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 ///@overload ///... ///Move constructor. ///This constructor steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. ///@param rr rvalue reference to a RegexReplace object reference RegexReplace(RegexReplace&& rr){ init_vars(); deepMove(rr); } ///@overload ///... ///Overloaded move assignment operator. ///This constructor steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. ///@param rr rvalue reference to a RegexReplace object reference ///@return A reference to the calling RegexReplace object RegexReplace& operator=(RegexReplace&& rr){ if(this == &rr) return *this; deepMove(rr); return *this; } #endif virtual ~RegexReplace() {} ///Reset all class variables to its default (initial) state including memory. ///@return Reference to the calling RegexReplace object. RegexReplace& reset() { String().swap(r_subject); String().swap(r_replw); init_vars(); return *this; } ///Clear all class variables to its default (initial) state (some memory may retain for further use). ///@return Reference to the calling RegexReplace object. RegexReplace& clear() { r_subject.clear(); r_replw.clear(); init_vars(); return *this; } ///Reset replace related errors to zero. ///@return Reference to the calling RegexReplace object ///@see Regex::resetErrors() ///@see RegexMatch::resetErrors() RegexReplace& resetErrors(){ error_number = 0; error_offset = 0; return *this; } /// Returns the last error number ///@return Last error number int getErrorNumber() const { return error_number; } /// Returns the last error offset ///@return Last error offset int getErrorOffset() const { return (int)error_offset; } /// Returns the last error message ///@return Last error message String getErrorMessage() const { #ifdef JPCRE2_USE_MINIMUM_CXX_11 return select::getErrorMessage(error_number, error_offset); #else return select::getErrorMessage(error_number, error_offset); #endif } /// Get replacement string ///@return replacement string String getReplaceWith() const { return *r_replw_ptr; } /// Get pointer to replacement string ///@return pointer to replacement string String const * getReplaceWithPointer() const { return r_replw_ptr; } /// Get subject string ///@return subject string ///@see RegexMatch::getSubject() String getSubject() const { return *r_subject_ptr; } /// Get pointer to subject string ///@return Pointer to constant subject string ///@see RegexMatch::getSubjectPointer() String const * getSubjectPointer() const { return r_subject_ptr; } /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. /// /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized /// as long as you don't do that explicitly. Calling RegexReplace::setModifier() will re-set them. /// /// **Mixed or combined modifier**. /// /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options /// get set, and when you remove the 'n' modifier (with `RegexReplace::changeModifier()`), both will get removed. /// @return Calculated modifier string (std::string) ///@see RegexMatch::getModifier() ///@see Regex::getModifier() std::string getModifier() const { return modtab ? modtab->fromReplaceOption(replace_opts, jpcre2_replace_opts) : MOD::fromReplaceOption(replace_opts, jpcre2_replace_opts); } ///Get the modifier table that is set, ///@return constant ModifierTable pointer. ModifierTable const* getModifierTable(){ return modtab; } ///Get start offset. ///@return the start offset where matching starts for replace operation PCRE2_SIZE getStartOffset() const { return _start_offset; } /// Get PCRE2 option ///@return PCRE2 option for replace ///@see Regex::getPcre2Option() ///@see RegexMatch::getPcre2Option() Uint getPcre2Option() const { return replace_opts; } /// Get JPCRE2 option ///@return JPCRE2 option for replace ///@see Regex::getJpcre2Option() ///@see RegexMatch::getJpcre2Option() Uint getJpcre2Option() const { return jpcre2_replace_opts; } ///Get a pointer to the associated Regex object. ///If no actual Regex object is associated, null is returned ///@return A pointer to the associated constant Regex object or null Regex const * getRegexObject() const { return re; } ///Return pointer to the match context that was previously set with setMatchContext(). ///Handling memory is the callers' responsibility. ///@return pointer to the match context (default: null). MatchContext* getMatchContext(){ return mcontext; } ///Get the pointer to the match data block that was set previously with setMatchData() ///Handling memory is the callers' responsibility. ///@return pointer to the match data (default: null). virtual MatchData* getMatchDataBlock(){ return mdata; } ///Get the initial buffer size that is being used by internal function pcre2_substitute ///@return buffer_size PCRE2_SIZE getBufferSize(){ return buffer_size; } ///Get the number of replacement in last replace operation. ///If you set an external counter with RegexReplace::setReplaceCounter(), ///a call to this getter method will dereference the pointer to the external counter ///and return the value. ///@return Last replace count SIZE_T getLastReplaceCount(){ return *last_replace_counter; } ///Set an external counter variable to store the replacement count. ///This counter will be updated after each replacement operation on this object. ///A call to this method will reset the internal counter to 0, thus when you reset the counter ///to internal counter (by giving null as param), the previous replace count won't be available. ///@param counter Pointer to a counter variable. Null sets the counter to default internal counter. ///@return Reference to the calling RegexReplace object. RegexReplace& setReplaceCounter(SIZE_T* counter){ last_replace_count = 0; last_replace_counter = counter ? counter : &last_replace_count; return *this; } ///Set the associated Regex object. ///Regex object is not modified. ///@param r Pointer to a Regex object. ///@return Reference to the calling RegexReplace object. RegexReplace& setRegexObject(Regex const *r){ re = r; return *this; } /// Set the subject string for replace. ///This makes a copy of the string. If no copy is desired or you are working ///with big text, consider passing by pointer. ///@param s Subject string ///@return Reference to the calling RegexReplace object ///@see RegexMatch::setSubject() RegexReplace& setSubject(String const &s) { r_subject = s; r_subject_ptr = &r_subject; //must overwrite return *this; } ///@overload ///... /// Set pointer to the subject string for replace, null pointer unsets it. /// The underlined data is not modified unless RegexReplace::preplace() method is used. ///@param s Pointer to subject string ///@return Reference to the calling RegexReplace object ///@see RegexMatch::setSubject() RegexReplace& setSubject(String *s) { if(s) r_subject_ptr = s; else { r_subject.clear(); r_subject_ptr = &r_subject; } return *this; } /// Set the replacement string. ///`$` is a special character which implies captured group. /// ///1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number. ///2. A named substring can be referenced with `${name}`, where 'name' is the group name. ///3. A literal `$` can be given as `$$`. /// ///**Note:** This function makes a copy of the string. If no copy is desired or ///you are working with big text, consider passing the string with pointer. /// ///@param s String to replace with ///@return Reference to the calling RegexReplace object RegexReplace& setReplaceWith(String const &s) { r_replw = s; r_replw_ptr = &r_replw; //must overwrite return *this; } ///@overload ///... ///@param s Pointer to the string to replace with, null pointer unsets it. ///@return Reference to the calling RegexReplace object RegexReplace& setReplaceWith(String const *s) { if(s) r_replw_ptr = s; else { r_replw.clear(); r_replw_ptr = &r_replw; } return *this; } /// Set the modifier string (resets all JPCRE2 and PCRE2 options) by calling RegexReplace::changeModifier(). ///@param s Modifier string. ///@return Reference to the calling RegexReplace object ///@see RegexMatch::setModifier() ///@see Regex::setModifier() RegexReplace& setModifier(Modifier const& s) { replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* must not be initialized to 0 */ jpcre2_replace_opts = 0; return changeModifier(s, true); } ///Set a custom modifier table to be used. ///@param mdt pointer to ModifierTable object. /// @return Reference to the calling RegexReplace object. RegexReplace& setModifierTable(ModifierTable const * mdt){ modtab = mdt; return *this; } /// Set the initial buffer size to be allocated for replaced string (used by PCRE2) ///@param x Buffer size ///@return Reference to the calling RegexReplace object RegexReplace& setBufferSize(PCRE2_SIZE x) { buffer_size = x; return *this; } ///Set start offset. ///Set the offset where matching starts for replace operation ///@param start_offset The offset where matching starts for replace operation ///@return Reference to the calling RegexReplace object RegexReplace& setStartOffset(PCRE2_SIZE start_offset){ _start_offset = start_offset; return *this; } /// Set JPCRE2 option for replace (overwrite existing option) ///@param x Option value ///@return Reference to the calling RegexReplace object ///@see RegexMatch::setJpcre2Option() ///@see Regex::setJpcre2Option() RegexReplace& setJpcre2Option(Uint x) { jpcre2_replace_opts = x; return *this; } /// Set PCRE2 option replace (overwrite existing option) ///@param x Option value ///@return Reference to the calling RegexReplace object ///@see RegexMatch::setPcre2Option() ///@see Regex::setPcre2Option() RegexReplace& setPcre2Option(Uint x) { replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | x; return *this; } ///Set the match context to be used. ///Native PCRE2 API may be used to create match context. ///The memory of the match context is not handled by RegexReplace object and not freed. ///User will be responsible for freeing memory. ///@param match_context Pointer to match context. ///@return Reference to the calling RegexReplace object. RegexReplace& setMatchContext(MatchContext * match_context){ mcontext = match_context; return *this; } ///Set the match data block to be used. ///Native PCRE2 API may be used to create match data block. ///The memory of the match data is not handled by RegexReplace object and not freed. ///User will be responsible for creating/freeing memory. ///@param match_data Pointer to match data. ///@return Reference to the calling RegexReplace object. RegexReplace& setMatchDataBlock(MatchData *match_data){ mdata = match_data; return *this; } /// After a call to this function PCRE2 and JPCRE2 options will be properly set. /// This function does not initialize or re-initialize options. /// If you want to set options from scratch, initialize them to 0 before calling this function. /// /// If invalid modifier is detected, then the error number for the RegexReplace /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. /// You can get the message with RegexReplace::getErrorMessage() function. /// @param mod Modifier string. /// @param x Whether to add or remove option /// @return Reference to the RegexReplace object /// @see Regex::changeModifier() /// @see RegexMatch::changeModifier() RegexReplace& changeModifier(Modifier const& mod, bool x){ modtab ? modtab->toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset) : MOD::toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset); return *this; } /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. /// Add or remove a JPCRE2 option /// @param opt JPCRE2 option value /// @param x Add the option if it's true, remove otherwise. /// @return Reference to the calling RegexReplace object /// @see RegexMatch::changeJpcre2Option() /// @see Regex::changeJpcre2Option() RegexReplace& changeJpcre2Option(Uint opt, bool x) { jpcre2_replace_opts = x ? jpcre2_replace_opts | opt : jpcre2_replace_opts & ~opt; return *this; } /// Add or remove a PCRE2 option /// @param opt PCRE2 option value /// @param x Add the option if it's true, remove otherwise. /// @return Reference to the calling RegexReplace object /// @see RegexMatch::changePcre2Option() /// @see Regex::changePcre2Option() RegexReplace& changePcre2Option(Uint opt, bool x) { replace_opts = x ? replace_opts | opt : replace_opts & ~opt; //replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* It's important, but let user override it. */ return *this; } /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. /// This is just a wrapper of the original function RegexReplace::changeModifier() /// provided for convenience. /// @param mod Modifier string. /// @return Reference to the calling RegexReplace object /// @see RegexMatch::addModifier() /// @see Regex::addModifier() RegexReplace& addModifier(Modifier const& mod){ return changeModifier(mod, true); } /// Add specified JPCRE2 option to existing options for replace. ///@param x Option value ///@return Reference to the calling RegexReplace object ///@see RegexMatch::addJpcre2Option() ///@see Regex::addJpcre2Option() RegexReplace& addJpcre2Option(Uint x) { jpcre2_replace_opts |= x; return *this; } /// Add specified PCRE2 option to existing options for replace ///@param x Option value ///@return Reference to the calling RegexReplace object ///@see RegexMatch::addPcre2Option() ///@see Regex::addPcre2Option() RegexReplace& addPcre2Option(Uint x) { replace_opts |= x; return *this; } /// Perform regex replace by retrieving subject string, replacement string, modifier and other options from class variables. /// In the replacement string (see RegexReplace::setReplaceWith()) `$` is a special character which implies captured group. /// 1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number. /// 2. A named substring can be referenced with `${name}`, where 'name' is the group name. /// 3. A literal `$` can be given as `$$`. /// 4. Bash like features: ${:-} and ${:+:}, where is a group number or name. /// ///All options supported by pcre2_substitute is available. /// /// Note: This function calls pcre2_substitute() to do the replacement. ///@return Replaced string String replace(void); /// Perl compatible replace method. /// Modifies subject string in-place and returns replace count. /// /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`. /// @return replace count SIZE_T preplace(void){ *r_subject_ptr = replace(); return *last_replace_counter; } /// Perl compatible replace method with match evaluator. /// Modifies subject string in-place and returns replace count. /// MatchEvaluator class does not have a implementation of this replace method, thus it is not possible /// to re-use match data with preplace() method. /// Re-using match data with preplace doesn't actually make any sense, because new subject will /// always require new match data. /// /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`. /// @param me MatchEvaluator object. /// @return replace count SIZE_T preplace(MatchEvaluator me){ *r_subject_ptr = me.setRegexObject(getRegexObject()) .setSubject(r_subject_ptr) //do not use method .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) .setMatchContext(getMatchContext()) .setMatchDataBlock(getMatchDataBlock()) .setBufferSize(getBufferSize()) .setStartOffset(getStartOffset()) .replace(true, getPcre2Option(), last_replace_counter); return *last_replace_counter; } ///JPCRE2 native replace function. ///A different name is adopted to ///distinguish itself from the regular replace() function which ///uses pcre2_substitute() to do the replacement; contrary to that, ///it will provide a JPCRE2 native way of replacement operation. ///It takes a MatchEvaluator object which provides a callback function that is used ///to generate replacement string on the fly. Any replacement string set with ///`RegexReplace::setReplaceWith()` function will have no effect. ///The string returned by the callback function will be treated as literal and will ///not go through any further processing. /// ///This function works on a copy of the MatchEvaluator, and thus makes no changes ///to the original. The copy is modified as below: /// ///1. Global replacement will set FIND_ALL for match, unset otherwise. ///2. Bad matching options such as `PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT` will be removed. ///3. subject, start_offset and Regex object will change according to the RegexReplace object. ///4. match context, and match data block will be changed according to the RegexReplace object. /// ///It calls MatchEvaluator::nreplace() on the MatchEvaluator object to perform the replacement. /// ///It always performs a new match. ///@param me A MatchEvaluator object. ///@return The resultant string after replacement. ///@see MatchEvaluator::nreplace() ///@see MatchEvaluator ///@see MatchEvaluatorCallback String nreplace(MatchEvaluator me){ return me.setRegexObject(getRegexObject()) .setSubject(getSubjectPointer()) .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) .setMatchContext(getMatchContext()) .setMatchDataBlock(getMatchDataBlock()) .setStartOffset(getStartOffset()) .nreplace(true, getJpcre2Option(), last_replace_counter); } ///PCRE2 compatible replace function that takes a MatchEvaluator. ///String returned by callback function is processed by pcre2_substitute, ///thus all PCRE2 substitute options are supported by this replace function. /// ///It always performs a new match. ///@param me MatchEvaluator instance, (copied and modified according to this object). ///@return resultant string. ///@see replace() String replace(MatchEvaluator me){ return me.setRegexObject(getRegexObject()) .setSubject(getSubjectPointer()) .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) .setMatchContext(getMatchContext()) .setMatchDataBlock(getMatchDataBlock()) .setBufferSize(getBufferSize()) .setStartOffset(getStartOffset()) .replace(true, getPcre2Option(), last_replace_counter); } }; /** Provides public constructors to create Regex object. * Each regex pattern needs an object of this class and each pattern needs to be compiled. * Pattern compilation can be done using one of its' overloaded constructors or the `Regex::compile()` * member function. * * Examples: * * ```cpp * jp::Regex re; //does not perform a compile * re.compile("pattern", "modifier"); * jp::Regex re2("pattern", "modifier"); //performs a compile * ``` * */ class Regex { private: friend class RegexMatch; friend class RegexReplace; friend class MatchEvaluator; String pat_str; String const *pat_str_ptr; Pcre2Code *code; Uint compile_opts; Uint jpcre2_compile_opts; ModifierTable const * modtab; CompileContext *ccontext; std::vector tabv; void init_vars() { jpcre2_compile_opts = 0; compile_opts = 0; error_number = 0; error_offset = 0; code = 0; pat_str_ptr = &pat_str; ccontext = 0; modtab = 0; } void freeRegexMemory(void) { Pcre2Func::code_free(code); code = 0; //we may use it again } void freeCompileContext(){ Pcre2Func::compile_context_free(ccontext); ccontext = 0; } void onlyCopy(Regex const &r){ //r.pat_str_ptr may point to other user data pat_str_ptr = (r.pat_str_ptr == &r.pat_str) ? &pat_str //not r.pat_str : r.pat_str_ptr; //other user data compile_opts = r.compile_opts; jpcre2_compile_opts = r.jpcre2_compile_opts; error_number = r.error_number; error_offset = r.error_offset; modtab = r.modtab; } void deepCopy(Regex const &r) { pat_str = r.pat_str; //must not use setPattern() here onlyCopy(r); //copy tables tabv = r.tabv; //copy ccontext if it's not null freeCompileContext(); ccontext = (r.ccontext) ? Pcre2Func::compile_context_copy(r.ccontext) : 0; //if tabv is not empty and ccontext is ok (not null) set the table pointer to ccontext if(ccontext && !tabv.empty()) Pcre2Func::set_character_tables(ccontext, &tabv[0]); //table pointer must be updated in the compiled code itself, jit memory copy is not available. //copy is not going to work, we need a recompile. //as all vars are already copied, we can just call compile() r.code ? compile() //compile frees previous memory. : freeRegexMemory(); } #ifdef JPCRE2_USE_MINIMUM_CXX_11 void deepMove(Regex& r) { pat_str = std::move_if_noexcept(r.pat_str); onlyCopy(r); //steal tables tabv = std::move_if_noexcept(r.tabv); //steal ccontext freeCompileContext(); ccontext = r.ccontext; r.ccontext = 0; //must set this to 0 if(ccontext && !tabv.empty()) Pcre2Func::set_character_tables(ccontext, &tabv[0]); //steal the code freeRegexMemory(); code = r.code; r.code = 0; //must set this to 0 } #endif protected: int error_number; PCRE2_SIZE error_offset; public: /// Default Constructor. /// Initializes all class variables to defaults. /// Does not perform any pattern compilation. Regex() { init_vars(); } ///Compile pattern with initialization. /// @param re Pattern string Regex(String const &re) { init_vars(); compile(re); } /// @overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. Regex(String const *re) { init_vars(); compile(re); } ///@overload /// @param re Pattern string . /// @param mod Modifier string. Regex(String const &re, Modifier const& mod) { init_vars(); compile(re, mod); } ///@overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. /// @param mod Modifier string. Regex(String const *re, Modifier const& mod) { init_vars(); compile(re, mod); } ///@overload /// @param re Pattern string . /// @param po PCRE2 option value Regex(String const &re, Uint po) { init_vars(); compile(re, po); } ///@overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. /// @param po PCRE2 option value Regex(String const *re, Uint po) { init_vars(); compile(re, po); } ///@overload /// @param re Pattern string . /// @param po PCRE2 option value /// @param jo JPCRE2 option value Regex(String const &re, Uint po, Uint jo) { init_vars(); compile(re, po, jo); } ///@overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. /// @param po PCRE2 option value /// @param jo JPCRE2 option value Regex(String const *re, Uint po, Uint jo) { init_vars(); compile(re, po, jo); } /// @overload ///... /// Copy constructor. /// A separate and new compile is performed from the copied options. /// /// @param r Constant Regex object reference. Regex(Regex const &r) { init_vars(); deepCopy(r); } /// Overloaded assignment operator. /// @param r Regex const & /// @return *this Regex& operator=(Regex const &r) { if (this == &r) return *this; deepCopy(r); return *this; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 /// @overload ///... /// Move constructor. ///This constructor steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. /// @param r rvalue reference to a Regex object. Regex(Regex&& r) { init_vars(); deepMove(r); } ///@overload ///... /// Overloaded move-assignment operator. ///This constructor steals resources from the argument. ///It leaves the argument in a valid but indeterminate sate. ///The indeterminate state can be returned to normal by calling reset() on that object. /// @param r Regex&& /// @return *this Regex& operator=(Regex&& r) { if (this == &r) return *this; deepMove(r); return *this; } /// Provides boolean check for the status of the object. /// This overloaded boolean operator needs to be declared /// explicit to prevent implicit conversion and overloading issues. /// /// We will only enable it if >=C++11 is being used, as the explicit keyword /// for a function other than constructor is not supported in older compilers. /// /// If you are dealing with legacy code/compilers use the Double bang trick mentioned /// in Regex::operator!(). /// /// This helps us to check the status of the compiled regex like this: /// /// ``` /// jpcre2::select::Regex re("pat", "mod"); /// if(re) { /// std::cout<<"Compile success"; /// } else { /// std::cout<<"Compile failed"; /// } /// ``` ///@return true if regex compiled successfully, false otherwise. /// explicit operator bool() const { return (code != 0); } #endif /// Provides boolean check for the status of the object. /// This is a safe boolean approach (no implicit conversion or overloading). /// We don't need the explicit keyword here and thus it's the preferable method /// to check for object status that will work well with older compilers. /// e.g: /// /// ``` /// jpcre2::select::Regex re("pat","mod"); /// if(!re) { /// std::cout<<"Compile failed"; /// } else { /// std::cout<<"Compiled successfully"; /// } /// ``` /// Double bang trick: /// /// ``` /// jpcre2::select::Regex re("pat","mod"); /// if(!!re) { /// std::cout<<"Compiled successfully"; /// } else { /// std::cout<<"Compile failed"; /// } /// ``` /// @return true if regex compile failed, false otherwise. bool operator!() const { return (code == 0); } virtual ~Regex() { freeRegexMemory(); freeCompileContext(); } ///Reset all class variables to its default (initial) state including memory. ///@return Reference to the calling Regex object. Regex& reset() { freeRegexMemory(); freeCompileContext(); String().swap(pat_str); init_vars(); return *this; } ///Clear all class variables to its default (initial) state (some memory may retain for further use). ///@return Reference to the calling Regex object. Regex& clear() { freeRegexMemory(); freeCompileContext(); pat_str.clear(); init_vars(); return *this; } ///Reset regex compile related errors to zero. ///@return A reference to the Regex object ///@see RegexReplace::resetErrors() ///@see RegexMatch::resetErrors() Regex& resetErrors() { error_number = 0; error_offset = 0; return *this; } /// Recreate character tables used by PCRE2. /// You should call this function after changing the locale to remake the /// character tables according to the new locale. /// These character tables are used to compile the regex and used by match /// and replace operation. A separate call to compile() will be required /// to apply the new character tables. /// @return Reference to the calling Regex object. Regex& resetCharacterTables() { const unsigned char* tables = Pcre2Func::maketables(0); //must pass 0, we are using free() to free the tables. tabv = std::vector(tables, tables+1088); ::free((void*)tables); //must free memory if(!ccontext) ccontext = Pcre2Func::compile_context_create(0); Pcre2Func::set_character_tables(ccontext, &tabv[0]); return *this; } ///Get Pcre2 raw compiled code pointer. ///@return pointer to constant pcre2_code or null. Pcre2Code const* getPcre2Code() const{ return code; } /// Get pattern string ///@return pattern string of type jpcre2::select::String String getPattern() const { return *pat_str_ptr; } /// Get pointer to pattern string ///@return Pointer to constant pattern string String const * getPatternPointer() const { return pat_str_ptr; } ///Get number of captures from compiled code. ///@return New line option value or 0. Uint getNumCaptures() { if(!code) return 0; Uint numCaptures = 0; int ret = Pcre2Func::pattern_info(code, PCRE2_INFO_CAPTURECOUNT, &numCaptures); if(ret < 0) error_number = ret; return numCaptures; } /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. /// /// **Mixed or combined modifier**. /// /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options /// get set, and when you remove the 'n' modifier (with `Regex::changeModifier()`), both will get removed. ///@tparam Char_T Character type ///@return Calculated modifier string (std::string) ///@see RegexMatch::getModifier() ///@see RegexReplace::getModifier() std::string getModifier() const { return modtab ? modtab->fromCompileOption(compile_opts, jpcre2_compile_opts) : MOD::fromCompileOption(compile_opts, jpcre2_compile_opts); } /// Get PCRE2 option /// @return Compile time PCRE2 option value ///@see RegexReplace::getPcre2Option() ///@see RegexMatch::getPcre2Option() Uint getPcre2Option() const { return compile_opts; } /// Get JPCRE2 option /// @return Compile time JPCRE2 option value ///@see RegexReplace::getJpcre2Option() ///@see RegexMatch::getJpcre2Option() Uint getJpcre2Option() const { return jpcre2_compile_opts; } /// Returns the last error number ///@return Last error number int getErrorNumber() const { return error_number; } /// Returns the last error offset ///@return Last error offset int getErrorOffset() const { return (int)error_offset; } /// Returns the last error message ///@return Last error message String getErrorMessage() const { #ifdef JPCRE2_USE_MINIMUM_CXX_11 return select::getErrorMessage(error_number, error_offset); #else return select::getErrorMessage(error_number, error_offset); #endif } ///Get new line convention from compiled code. ///@return New line option value or 0. ///``` ///PCRE2_NEWLINE_CR Carriage return only ///PCRE2_NEWLINE_LF Linefeed only ///PCRE2_NEWLINE_CRLF CR followed by LF only ///PCRE2_NEWLINE_ANYCRLF Any of the above ///PCRE2_NEWLINE_ANY Any Unicode newline sequence ///``` Uint getNewLine() { if(!code) return 0; Uint newline = 0; int ret = Pcre2Func::pattern_info(code, PCRE2_INFO_NEWLINE, &newline); if(ret < 0) error_number = ret; return newline; } ///Get the modifier table that is set, ///@return constant ModifierTable pointer. ModifierTable const* getModifierTable(){ return modtab; } ///Set new line convention. ///@param value New line option value. ///``` ///PCRE2_NEWLINE_CR Carriage return only ///PCRE2_NEWLINE_LF Linefeed only ///PCRE2_NEWLINE_CRLF CR followed by LF only ///PCRE2_NEWLINE_ANYCRLF Any of the above ///PCRE2_NEWLINE_ANY Any Unicode newline sequence ///``` ///@return Reference to the calling Regex object Regex& setNewLine(Uint value){ if(!ccontext) ccontext = Pcre2Func::compile_context_create(0); int ret = Pcre2Func::set_newline(ccontext, value); if(ret < 0) error_number = ret; return *this; } /// Set the pattern string to compile /// @param re Pattern string /// @return Reference to the calling Regex object. Regex& setPattern(String const &re) { pat_str = re; pat_str_ptr = &pat_str; //must overwrite return *this; } /// @overload /// @param re Pattern string pointer, null pointer will unset it. /// @return Reference to the calling Regex object. Regex& setPattern(String const *re) { if(re) pat_str_ptr = re; else { pat_str.clear(); pat_str_ptr = &pat_str; } return *this; } /// set the modifier (resets all JPCRE2 and PCRE2 options) by calling Regex::changeModifier(). /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier and sets /// equivalent PCRE2 and JPCRE2 options. /// @param x Modifier string. /// @return Reference to the calling Regex object. /// @see RegexMatch::setModifier() /// @see RegexReplace::setModifier() Regex& setModifier(Modifier const& x) { compile_opts = 0; jpcre2_compile_opts = 0; return changeModifier(x, true); } ///Set a custom modifier table to be used. ///@param mdt pointer to ModifierTable object. /// @return Reference to the calling Regex object. Regex& setModifierTable(ModifierTable const * mdt){ modtab = mdt; return *this; } /// Set JPCRE2 option for compile (overwrites existing option) /// @param x Option value /// @return Reference to the calling Regex object. /// @see RegexMatch::setJpcre2Option() /// @see RegexReplace::setJpcre2Option() Regex& setJpcre2Option(Uint x) { jpcre2_compile_opts = x; return *this; } /// Set PCRE2 option for compile (overwrites existing option) /// @param x Option value /// @return Reference to the calling Regex object. /// @see RegexMatch::setPcre2Option() /// @see RegexReplace::setPcre2Option() Regex& setPcre2Option(Uint x) { compile_opts = x; return *this; } /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. /// This function does not initialize or re-initialize options. /// If you want to set options from scratch, initialize them to 0 before calling this function. /// /// If invalid modifier is detected, then the error number for the Regex /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. /// You can get the message with Regex::getErrorMessage() function. /// @param mod Modifier string. /// @param x Whether to add or remove option /// @return Reference to the calling Regex object /// @see RegexMatch::changeModifier() /// @see RegexReplace::changeModifier() Regex& changeModifier(Modifier const& mod, bool x){ modtab ? modtab->toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset) : MOD::toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset); return *this; } /// Add or remove a JPCRE2 option /// @param opt JPCRE2 option value /// @param x Add the option if it's true, remove otherwise. /// @return Reference to the calling Regex object /// @see RegexMatch::changeJpcre2Option() /// @see RegexReplace::changeJpcre2Option() Regex& changeJpcre2Option(Uint opt, bool x) { jpcre2_compile_opts = x ? jpcre2_compile_opts | opt : jpcre2_compile_opts & ~opt; return *this; } /// Add or remove a PCRE2 option /// @param opt PCRE2 option value /// @param x Add the option if it's true, remove otherwise. /// @return Reference to the calling Regex object /// @see RegexMatch::changePcre2Option() /// @see RegexReplace::changePcre2Option() Regex& changePcre2Option(Uint opt, bool x) { compile_opts = x ? compile_opts | opt : compile_opts & ~opt; return *this; } /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. /// This is just a wrapper of the original function Regex::changeModifier() /// provided for convenience. /// @param mod Modifier string. /// @return Reference to the calling Regex object /// @see RegexMatch::addModifier() /// @see RegexReplace::addModifier() Regex& addModifier(Modifier const& mod){ return changeModifier(mod, true); } /// Add option to existing JPCRE2 options for compile /// @param x Option value /// @return Reference to the calling Regex object /// @see RegexMatch::addJpcre2Option() /// @see RegexReplace::addJpcre2Option() Regex& addJpcre2Option(Uint x) { jpcre2_compile_opts |= x; return *this; } /// Add option to existing PCRE2 options for compile /// @param x Option value /// @return Reference to the calling Regex object /// @see RegexMatch::addPcre2Option() /// @see RegexReplace::addPcre2Option() Regex& addPcre2Option(Uint x) { compile_opts |= x; return *this; } ///Compile pattern using info from class variables. ///@see Regex::compile(String const &re, Uint po, Uint jo) ///@see Regex::compile(String const &re, Uint po) ///@see Regex::compile(String const &re, Modifier mod) ///@see Regex::compile(String const &re) void compile(void); ///@overload ///... /// Set the specified parameters, then compile the pattern using information from class variables. /// @param re Pattern string /// @param po PCRE2 option /// @param jo JPCRE2 option void compile(String const &re, Uint po, Uint jo) { setPattern(re).setPcre2Option(po).setJpcre2Option(jo); compile(); } ///@overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. /// @param po PCRE2 option /// @param jo JPCRE2 option void compile(String const *re, Uint po, Uint jo) { setPattern(re).setPcre2Option(po).setJpcre2Option(jo); compile(); } ///@overload /// @param re Pattern string /// @param po PCRE2 option void compile(String const &re, Uint po) { setPattern(re).setPcre2Option(po); compile(); } ///@overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. /// @param po PCRE2 option void compile(String const *re, Uint po) { setPattern(re).setPcre2Option(po); compile(); } /// @overload /// @param re Pattern string /// @param mod Modifier string. void compile(String const &re, Modifier const& mod) { setPattern(re).setModifier(mod); compile(); } ///@overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. /// @param mod Modifier string. void compile(String const *re, Modifier const& mod) { setPattern(re).setModifier(mod); compile(); } ///@overload /// @param re Pattern string . void compile(String const &re) { setPattern(re); compile(); } ///@overload /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. void compile(String const *re) { setPattern(re); compile(); } ///Returns a default constructed RegexMatch object by value. ///This object is initialized with the same modifier table ///as this Regex object. ///@return RegexMatch object. RegexMatch initMatch(){ RegexMatch rm(this); rm.setModifierTable(modtab); return rm; } ///Synonym for initMatch() ///@return RegexMatch object by value. RegexMatch getMatchObject(){ return initMatch(); } /// Perform regex match and return match count using a temporary match object. /// This temporary match object will get available options from this Regex object, /// that includes modifier table. /// @param s Subject string . /// @param mod Modifier string. /// @param start_offset Offset from where matching will start in the subject string. /// @return Match count /// @see RegexMatch::match() SIZE_T match(String const &s, Modifier const& mod, PCRE2_SIZE start_offset=0) { return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match(); } ///@overload ///... ///@param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject. ///@param mod Modifier string. ///@param start_offset Offset from where matching will start in the subject string. ///@return Match count SIZE_T match(String const *s, Modifier const& mod, PCRE2_SIZE start_offset=0) { return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match(); } ///@overload ///... /// @param s Subject string . /// @param start_offset Offset from where matching will start in the subject string. /// @return Match count /// @see RegexMatch::match() SIZE_T match(String const &s, PCRE2_SIZE start_offset=0) { return initMatch().setStartOffset(start_offset).setSubject(s).match(); } ///@overload ///... /// @param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject. /// @param start_offset Offset from where matching will start in the subject string. /// @return Match count /// @see RegexMatch::match() SIZE_T match(String const *s, PCRE2_SIZE start_offset=0) { return initMatch().setStartOffset(start_offset).setSubject(s).match(); } ///Returns a default constructed RegexReplace object by value. ///This object is initialized with the same modifier table as this Regex object. ///@return RegexReplace object. RegexReplace initReplace(){ RegexReplace rr(this); rr.setModifierTable(modtab); return rr; } ///Synonym for initReplace() ///@return RegexReplace object. RegexReplace getReplaceObject(){ return initReplace(); } /// Perform regex replace and return the replaced string using a temporary replace object. /// This temporary replace object will get available options from this Regex object, /// that includes modifier table. /// @param mains Subject string. /// @param repl String to replace with /// @param mod Modifier string. ///@param counter Pointer to a counter to store the number of replacement done. /// @return Resultant string after regex replace /// @see RegexReplace::replace() String replace(String const &mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) { return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); } ///@overload /// @param mains Pointer to subject string /// @param repl String to replace with /// @param mod Modifier string. ///@param counter Pointer to a counter to store the number of replacement done. /// @return Resultant string after regex replace /// @see RegexReplace::replace() String replace(String *mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) { return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); } ///@overload ///... /// @param mains Subject string /// @param repl Pointer to string to replace with /// @param mod Modifier string. ///@param counter Pointer to a counter to store the number of replacement done. /// @return Resultant string after regex replace /// @see RegexReplace::replace() String replace(String const &mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) { return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); } ///@overload ///... /// @param mains Pointer to subject string /// @param repl Pointer to string to replace with /// @param mod Modifier string. ///@param counter Pointer to a counter to store the number of replacement done. /// @return Resultant string after regex replace /// @see RegexReplace::replace() String replace(String *mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) { return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); } /// Perl compatible replace method. /// Modifies subject string in-place and returns replace count. /// /// It's a shorthand method to `RegexReplace::preplace()`. /// @param mains Pointer to subject string. /// @param repl Replacement string (string to replace with). /// @param mod Modifier string. /// @return replace count. SIZE_T preplace(String * mains, String const& repl, Modifier const& mod=""){ SIZE_T counter = 0; if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); return counter; } /// @overload /// /// Perl compatible replace method. /// Modifies subject string in-place and returns replace count. /// /// It's a shorthand method to `RegexReplace::preplace()`. /// @param mains Pointer to subject string. /// @param repl Pointer to replacement string (string to replace with). /// @param mod Modifier string. /// @return replace count. SIZE_T preplace(String * mains, String const* repl, Modifier const& mod=""){ SIZE_T counter = 0; if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); return counter; } /// @overload /// /// Perl compatible replace method. /// Returns replace count and discards subject string. /// /// It's a shorthand method to `RegexReplace::preplace()`. /// @param mains Subject string. /// @param repl Replacement string (string to replace with). /// @param mod Modifier string. /// @return replace count. SIZE_T preplace(String const& mains, String const& repl, Modifier const& mod=""){ SIZE_T counter = 0; initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); return counter; } /// @overload /// /// Perl compatible replace method. /// Returns replace count and discards subject string. /// /// It's a shorthand method to `RegexReplace::preplace()`. /// @param mains Subject string. /// @param repl Pointer to replacement string (string to replace with). /// @param mod Modifier string. /// @return replace count. SIZE_T preplace(String const& mains, String const* repl, Modifier const& mod=""){ SIZE_T counter = 0; initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); return counter; } }; private: //prevent object instantiation of select class select(); select(select const &); #ifdef JPCRE2_USE_MINIMUM_CXX_11 select(select&&); #endif ~select(); };//struct select }//jpcre2 namespace inline void jpcre2::ModifierTable::parseModifierTable(std::string& tabjs, VecOpt& tabjv, std::string& tab_s, VecOpt& tab_v, std::string const& tabs, VecOpt const& tabv){ SIZE_T n = tabs.length(); JPCRE2_ASSERT(n == tabv.size(), ("ValueError: Could not set Modifier table.\ Modifier character and value tables are not of the same size (" + _tostdstring(n) + " == " + _tostdstring(tabv.size()) + ").").c_str()); tabjs.clear(); tab_s.clear(); tab_s.reserve(n); tabjv.clear(); tab_v.clear(); tab_v.reserve(n); for(SIZE_T i=0;i class Map> void jpcre2::select::Regex::compile() { #else template void jpcre2::select::Regex::compile() { #endif //Get c_str of pattern Pcre2Sptr c_pattern = (Pcre2Sptr) pat_str_ptr->c_str(); int err_number = 0; PCRE2_SIZE err_offset = 0; /************************************************************************** * Compile the regular expression pattern, and handle * any errors that are detected. *************************************************************************/ //first release any previous memory freeRegexMemory(); code = Pcre2Func::compile( c_pattern, /* the pattern */ PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ compile_opts, /* default options */ &err_number, /* for error number */ &err_offset, /* for error offset */ ccontext); /* use compile context */ if (code == 0) { /* Compilation failed */ //must not free regex memory, the only function has that right is the destructor error_number = err_number; error_offset = err_offset; return; } else if ((jpcre2_compile_opts & JIT_COMPILE) != 0) { ///perform JIT compilation it it's enabled int jit_ret = Pcre2Func::jit_compile(code, PCRE2_JIT_COMPLETE); if(jit_ret < 0) error_number = jit_ret; } //everything's OK } #ifdef JPCRE2_USE_MINIMUM_CXX_11 template class Map> typename jpcre2::select::String jpcre2::select::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) { #else template typename jpcre2::select::String jpcre2::select::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) { #endif if(counter) *counter = 0; replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; replace_opts &= ~PCRE2_SUBSTITUTE_GLOBAL; Regex const * re = RegexMatch::getRegexObject(); // If re or re->code is null, return the subject string unmodified. if (!re || re->code == 0) return RegexMatch::getSubject(); Pcre2Sptr r_subject_ptr = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str(); //~ SIZE_T totlen = RegexMatch::getSubjectPointer()->length(); if(do_match) match(); SIZE_T mcount = vec_soff.size(); // if mcount is 0, return the subject string. (there's no need to worry about re) if(!mcount) return RegexMatch::getSubject(); SIZE_T current_offset = 0; //needs to be zero, not start_offset, because it's from where unmatched parts will be copied. String res, tmp; //A check, this check is not fullproof. SIZE_T last = vec_eoff.size(); last = (last>0)?last-1:0; JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\ If you are using existing match data, try a new match."); //loop through the matches for(SIZE_T i=0;ic_str(); //substr(vec_soff[i], vec_eoff[i] - vec_soff[i]).c_str(); Pcre2Sptr subject = r_subject_ptr + vec_soff[i]; PCRE2_SIZE subject_length = vec_eoff[i] - vec_soff[i]; ///the string returned from the callback is the replacement string. Pcre2Sptr replace = (Pcre2Sptr) tmp.c_str(); PCRE2_SIZE replace_length = tmp.length(); bool retry = true; int ret = 0; PCRE2_SIZE outlengthptr = 0; Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1](); while (true) { ret = Pcre2Func::substitute( re->code, /*Points to the compiled pattern*/ subject, /*Points to the subject string*/ subject_length, /*Length of the subject string*/ 0, /*Offset in the subject at which to start matching*/ //must be zero replace_opts, /*Option bits*/ RegexMatch::mdata, /*Points to a match data block, or is NULL*/ RegexMatch::mcontext, /*Points to a match context, or is NULL*/ replace, /*Points to the replacement string*/ replace_length, /*Length of the replacement string*/ output_buffer, /*Points to the output buffer*/ &outlengthptr /*Points to the length of the output buffer*/ ); if (ret < 0) { //Handle errors if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0 && ret == (int) PCRE2_ERROR_NOMEMORY && retry) { retry = false; /// If initial #buffer_size wasn't big enough for resultant string, /// we will try once more with a new buffer size adjusted to the length of the resultant string. delete[] output_buffer; output_buffer = new Pcre2Uchar[outlengthptr + 1](); // Go and try to perform the substitute again continue; } else { RegexMatch::error_number = ret; delete[] output_buffer; return RegexMatch::getSubject(); } } //If everything's ok exit the loop break; } res += String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) ); delete[] output_buffer; if(counter) *counter += ret; //if FIND_ALL is not set, single match will be performed if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break; } //All matched parts have been dealt with. //now copy rest of the string from current_offset res += RegexMatch::getSubject().substr(current_offset, String::npos); return res; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 template class Map> typename jpcre2::select::String jpcre2::select::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){ #else template typename jpcre2::select::String jpcre2::select::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){ #endif if(counter) *counter = 0; if(do_match) match(); SIZE_T mcount = vec_soff.size(); // if mcount is 0, return the subject string. (there's no need to worry about re) if(!mcount) return RegexMatch::getSubject(); SIZE_T current_offset = 0; //no need for worrying about start offset, it's handled by match and we get valid offsets out of it. String res; //A check, this check is not fullproof SIZE_T last = vec_eoff.size(); last = (last>0)?last-1:0; JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\ If you are using existing match data, try a new match."); //loop through the matches for(SIZE_T i=0;i class Map> typename jpcre2::select::String jpcre2::select::RegexReplace::replace() { #else template typename jpcre2::select::String jpcre2::select::RegexReplace::replace() { #endif *last_replace_counter = 0; // If re or re->code is null, return the subject string unmodified. if (!re || re->code == 0) return *r_subject_ptr; Pcre2Sptr subject = (Pcre2Sptr) r_subject_ptr->c_str(); PCRE2_SIZE subject_length = r_subject_ptr->length(); Pcre2Sptr replace = (Pcre2Sptr) r_replw_ptr->c_str(); PCRE2_SIZE replace_length = r_replw_ptr->length(); PCRE2_SIZE outlengthptr = (PCRE2_SIZE) buffer_size; bool retry = true; int ret = 0; Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1](); while (true) { ret = Pcre2Func::substitute( re->code, /*Points to the compiled pattern*/ subject, /*Points to the subject string*/ subject_length, /*Length of the subject string*/ _start_offset, /*Offset in the subject at which to start matching*/ replace_opts, /*Option bits*/ mdata, /*Points to a match data block, or is NULL*/ mcontext, /*Points to a match context, or is NULL*/ replace, /*Points to the replacement string*/ replace_length, /*Length of the replacement string*/ output_buffer, /*Points to the output buffer*/ &outlengthptr /*Points to the length of the output buffer*/ ); if (ret < 0) { //Handle errors if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0 && ret == (int) PCRE2_ERROR_NOMEMORY && retry) { retry = false; /// If initial #buffer_size wasn't big enough for resultant string, /// we will try once more with a new buffer size adjusted to the length of the resultant string. delete[] output_buffer; output_buffer = new Pcre2Uchar[outlengthptr + 1](); // Go and try to perform the substitute again continue; } else { error_number = ret; delete[] output_buffer; return *r_subject_ptr; } } //If everything's ok exit the loop break; } *last_replace_counter += ret; String result = String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) ); delete[] output_buffer; return result; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 template class Map> bool jpcre2::select::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) { #else template bool jpcre2::select::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) { #endif NumSub num_sub; uint32_t rcu = rc; num_sub.reserve(rcu); //we know exactly how many elements it will have. uint32_t i; for (i = 0u; i < ovector_count; i++) { if (ovector[2*i] != PCRE2_UNSET) num_sub.push_back(String((Char*)(subject + ovector[2*i]), ovector[2*i+1] - ovector[2*i])); else #ifdef JPCRE2_UNSET_CAPTURES_NULL num_sub.push_back(std::nullopt); #else num_sub.push_back(String()); #endif } vec_num->push_back(num_sub); //this function shouldn't be called if this vector is null return true; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 template class Map> bool jpcre2::select::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size, Pcre2Sptr name_table, Pcre2Sptr subject, PCRE2_SIZE* ovector ) { #else template bool jpcre2::select::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size, Pcre2Sptr name_table, Pcre2Sptr subject, PCRE2_SIZE* ovector ) { #endif Pcre2Sptr tabptr = name_table; String key; MapNas map_nas; MapNtN map_ntn; for (int i = 0; i < namecount; i++) { int n; if(sizeof( Char_T ) * CHAR_BIT == 8){ n = (int)((tabptr[0] << 8) | tabptr[1]); key = toString((Char*) (tabptr + 2)); } else{ n = (int)tabptr[0]; key = toString((Char*) (tabptr + 1)); } //Use of tabptr is finished for this iteration, let's increment it now. tabptr += name_entry_size; String value((Char*)(subject + ovector[2*n]), ovector[2*n+1] - ovector[2*n]); //n, not i. if(vec_nas) map_nas[key] = value; if(vec_ntn) map_ntn[key] = n; } //push the maps into vectors: if(vec_nas) vec_nas->push_back(map_nas); if(vec_ntn) vec_ntn->push_back(map_ntn); return true; } #ifdef JPCRE2_USE_MINIMUM_CXX_11 template class Map> jpcre2::SIZE_T jpcre2::select::RegexMatch::match() { #else template jpcre2::SIZE_T jpcre2::select::RegexMatch::match() { #endif // If re or re->code is null, return 0 as the match count if (!re || re->code == 0) return 0; Pcre2Sptr subject = (Pcre2Sptr) m_subject_ptr->c_str(); Pcre2Sptr name_table = 0; int crlf_is_newline = 0; int namecount = 0; int name_entry_size = 0; int rc = 0; uint32_t ovector_count = 0; int utf = 0; SIZE_T count = 0; Uint option_bits; Uint newline = 0; PCRE2_SIZE *ovector = 0; SIZE_T subject_length = 0; MatchData *match_data = 0; subject_length = m_subject_ptr->length(); bool mdc = false; //mdata created. if (vec_num) vec_num->clear(); if (vec_nas) vec_nas->clear(); if (vec_ntn) vec_ntn->clear(); if(vec_soff) vec_soff->clear(); if(vec_eoff) vec_eoff->clear(); /* Using this function ensures that the block is exactly the right size for the number of capturing parentheses in the pattern. */ if(mdata) match_data = mdata; else { match_data = Pcre2Func::match_data_create_from_pattern(re->code, 0); mdc = true; } rc = Pcre2Func::match( re->code, /* the compiled pattern */ subject, /* the subject string */ subject_length, /* the length of the subject */ _start_offset, /* start at offset 'start_offset' in the subject */ match_opts, /* default options */ match_data, /* block for storing the result */ mcontext); /* use default match context */ /* Matching failed: handle error cases */ if (rc < 0) { if(mdc) Pcre2Func::match_data_free(match_data); /* Release memory used for the match */ //must not free code. This function has no right to modify regex switch (rc) { case PCRE2_ERROR_NOMATCH: return count; /* Handle other special cases if you like */ default:; } error_number = rc; return count; } ++count; //Increment the counter /* Match succeded. Get a pointer to the output vector, where string offsets are stored. */ ovector = Pcre2Func::get_ovector_pointer(match_data); ovector_count = Pcre2Func::get_ovector_count(match_data); /************************************************************************//* * We have found the first match within the subject string. If the output * * vector wasn't big enough, say so. Then output any substrings that were * * captured. * *************************************************************************/ /* The output vector wasn't big enough. This should not happen, because we used pcre2_match_data_create_from_pattern() above. */ if (rc == 0) { //ovector was not big enough for all the captured substrings; error_number = (int)ERROR::INSUFFICIENT_OVECTOR; rc = ovector_count; // TODO: We may throw exception at this point. } //match succeeded at offset ovector[0] if(vec_soff) vec_soff->push_back(ovector[0]); if(vec_eoff) vec_eoff->push_back(ovector[1]); // Get numbered substrings if vec_num isn't null if (vec_num) { //must do null check if(!getNumberedSubstrings(rc, subject, ovector, ovector_count)) return count; } //get named substrings if either vec_nas or vec_ntn is given. if (vec_nas || vec_ntn) { /* See if there are any named substrings, and if so, show them by name. First we have to extract the count of named parentheses from the pattern. */ (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ &namecount); /* where to put the answer */ if (namecount <= 0); /*No named substrings*/ else { /* Before we can access the substrings, we must extract the table for translating names to numbers, and the size of each entry in the table. */ (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ PCRE2_INFO_NAMETABLE, /* address of the table */ &name_table); /* where to put the answer */ (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ &name_entry_size); /* where to put the answer */ /* Now we can scan the table and, for each entry, print the number, the name, and the substring itself. In the 8-bit library the number is held in two bytes, most significant first. */ // Get named substrings if vec_nas isn't null. // Get name to number map if vec_ntn isn't null. } //the following must be outside the above if-else if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector)) return count; } /***********************************************************************//* * If the "g" modifier was given, we want to continue * * to search for additional matches in the subject string, in a similar * * way to the /g option in Perl. This turns out to be trickier than you * * might think because of the possibility of matching an empty string. * * What happens is as follows: * * * * If the previous match was NOT for an empty string, we can just start * * the next match at the end of the previous one. * * * * If the previous match WAS for an empty string, we can't do that, as it * * would lead to an infinite loop. Instead, a call of pcre2_match() is * * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The * * first of these tells PCRE2 that an empty string at the start of the * * subject is not a valid match; other possibilities must be tried. The * * second flag restricts PCRE2 to one match attempt at the initial string * * position. If this match succeeds, an alternative to the empty string * * match has been found, and we can print it and proceed round the loop, * * advancing by the length of whatever was found. If this match does not * * succeed, we still stay in the loop, advancing by just one character. * * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be * * more than one byte. * * * * However, there is a complication concerned with newlines. When the * * newline convention is such that CRLF is a valid newline, we must * * advance by two characters rather than one. The newline convention can * * be set in the regex by (*CR), etc.; if not, we must find the default. * *************************************************************************/ if ((jpcre2_match_opts & FIND_ALL) == 0) { if(mdc) Pcre2Func::match_data_free(match_data); /* Release the memory that was used */ // Must not free code. This function has no right to modify regex. return count; /* Exit the program. */ } /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline sequence. First, find the options with which the regex was compiled and extract the UTF state. */ (void) Pcre2Func::pattern_info(re->code, PCRE2_INFO_ALLOPTIONS, &option_bits); utf = ((option_bits & PCRE2_UTF) != 0); /* Now find the newline convention and see whether CRLF is a valid newline sequence. */ (void) Pcre2Func::pattern_info(re->code, PCRE2_INFO_NEWLINE, &newline); crlf_is_newline = newline == PCRE2_NEWLINE_ANY || newline == PCRE2_NEWLINE_CRLF || newline == PCRE2_NEWLINE_ANYCRLF; /** We got the first match. Now loop for second and subsequent matches. */ for (;;) { Uint options = match_opts; /* Normally no options */ PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ /* If the previous match was for an empty string, we are finished if we are at the end of the subject. Otherwise, arrange to run another match at the same point to see if a non-empty match can be found. */ if (ovector[0] == ovector[1]) { if (ovector[0] == subject_length) break; options |= PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; } /// Run the next matching operation */ rc = Pcre2Func::match( re->code, /* the compiled pattern */ subject, /* the subject string */ subject_length, /* the length of the subject */ start_offset, /* starting offset in the subject */ options, /* options */ match_data, /* block for storing the result */ mcontext); /* use match context */ /* This time, a result of NOMATCH isn't an error. If the value in "options" is zero, it just means we have found all possible matches, so the loop ends. Otherwise, it means we have failed to find a non-empty-string match at a point where there was a previous empty-string match. In this case, we do what Perl does: advance the matching position by one character, and continue. We do this by setting the "end of previous match" offset, because that is picked up at the top of the loop as the point at which to start again. There are two complications: (a) When CRLF is a valid newline sequence, and the current position is just before it, advance by an extra byte. (b) Otherwise we must ensure that we skip an entire UTF character if we are in UTF mode. */ if (rc == PCRE2_ERROR_NOMATCH) { if (options == 0) break; /* All matches found */ ovector[1] = start_offset + 1; /* Advance one code unit */ if (crlf_is_newline && /* If CRLF is newline & */ start_offset < subject_length - 1 && /* we are at CRLF, */ subject[start_offset] == '\r' && subject[start_offset + 1] == '\n') ovector[1] += 1; /* Advance by one more. */ else if (utf) { /* advance a whole UTF (8 or 16), for UTF-32, it's not needed */ while (ovector[1] < subject_length) { if(sizeof( Char_T ) * CHAR_BIT == 8 && (subject[ovector[1]] & 0xc0) != 0x80) break; else if(sizeof( Char_T ) * CHAR_BIT == 16 && (subject[ovector[1]] & 0xfc00) != 0xdc00) break; else if(sizeof( Char_T ) * CHAR_BIT == 32) break; //must be else if ovector[1] += 1; } } continue; /* Go round the loop again */ } /* Other matching errors are not recoverable. */ if (rc < 0) { if(mdc) Pcre2Func::match_data_free(match_data); // Must not free code. This function has no right to modify regex. error_number = rc; return count; } /* match succeeded */ ++count; //Increment the counter if (rc == 0) { /* The match succeeded, but the output vector wasn't big enough. This should not happen. */ error_number = (int)ERROR::INSUFFICIENT_OVECTOR; rc = ovector_count; // TODO: We may throw exception at this point. } //match succeded at ovector[0] if(vec_soff) vec_soff->push_back(ovector[0]); if(vec_eoff) vec_eoff->push_back(ovector[1]); /* As before, get substrings stored in the output vector by number, and then also any named substrings. */ // Get numbered substrings if vec_num isn't null if (vec_num) { //must do null check if(!getNumberedSubstrings(rc, subject, ovector, ovector_count)) return count; } if (vec_nas || vec_ntn) { //must call this whether we have named substrings or not: if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector)) return count; } } /* End of loop to find second and subsequent matches */ if(mdc) Pcre2Func::match_data_free(match_data); // Must not free code. This function has no right to modify regex. return count; } #undef JPCRE2_VECTOR_DATA_ASSERT #undef JPCRE2_UNUSED #undef JPCRE2_USE_MINIMUM_CXX_11 //some macro documentation for doxygen #ifdef __DOXYGEN__ #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK #define JPCRE2_USE_FUNCTION_POINTER_CALLBACK #endif #ifndef JPCRE2_NDEBUG #define JPCRE2_NDEBUG #endif ///@def JPCRE2_USE_FUNCTION_POINTER_CALLBACK ///Use function pointer in all cases for MatchEvaluatorCallback function. ///By default function pointer is used for callback in MatchEvaluator when using =C++11` compiler `std::function` instead of function pointer is used. ///If this macro is defined before including jpcre2.hpp, function pointer will be used in all cases. ///It you are using lambda function with captures, stick with `std::function`, on the other hand, if ///you are using older compilers, you might want to use function pointer instead. /// ///For example, with gcc-4.7, `std::function` will give compile error in C++11 mode, in such cases where full C++11 ///support is not available, use function pointer. ///@def JPCRE2_ASSERT(cond, msg) ///Macro to call `jpcre2::jassert()` with file path and line number. ///When `NDEBUG` or `JPCRE2_NDEBUG` is defined before including this header, this macro will ///be defined as `((void)0)` thus eliminating this assertion. ///@param cond condtion (boolean) ///@param msg message ///@def JPCRE2_NDEBUG ///Macro to remove debug codes. ///Using this macro is discouraged even in production mode but provided for completeness. ///You should not use this macro to bypass any error in your program. ///Define this macro before including this header if you want to remove debug codes included in this library. /// ///Using the standard `NDEBUG` macro will have the same effect, ///but it is recommended that you use `JPCRE2_NDEBUG` to strip out debug codes specifically for this library. ///@def JPCRE2_UNSET_CAPTURES_NULL ///Define to change the type of NumSub so that captures are recorded ///with std::optional. It is undefined by default. This feature requires C++17. #endif #endif