diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..017c215cca --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +/urb/urbit.pill binary diff --git a/Makefile b/Makefile index 5b2abc7d5c..d68b97a353 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ LIB=$(PWD)/lib RM=rm -f CC=gcc -CLD=gcc -O2 -g -L/usr/local/lib -L/opt/local/lib +CLD=g++ -O2 -g -L/usr/local/lib -L/opt/local/lib YACC=bison -v -b$(GENERATED)/y LEX=lex @@ -43,24 +43,25 @@ ifeq ($(OS),osx) OSLIBS=-framework CoreServices -framework CoreFoundation endif ifeq ($(OS),linux) - OSLIBS=-lpthread -lrt -lcurses + OSLIBS=-lpthread -lrt -lcurses DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE endif ifeq ($(OS),bsd) OSLIBS=-lpthread -lncurses -lkvm endif -LIBS=-lssl -lcrypto -lgmp -lncurses -lsigsegv $(OSLIBS) +LIBS=-lssl -lcrypto -lgmp -lncurses -lsigsegv $(OSLIBS) -lre2 INCLUDE=include GENERATED=generated MDEFINES=-DU2_OS_$(OS) -DU2_OS_ENDIAN_$(ENDIAN) -D U2_LIB=\"$(LIB)\" -CFLAGS=-O2 -g \ +CFLAGS= -O2 -g \ -I/usr/local/include \ -I/opt/local/include \ -I$(INCLUDE) \ -Ioutside/libuv/include \ + -Ioutside/cre2/include \ -I $(GENERATED) \ $(DEFINES) \ $(MDEFINES) @@ -451,6 +452,8 @@ J164_5_OFILES=\ gen164/5/mat.o \ gen164/5/mink.o \ gen164/5/parse.o \ + gen164/5/repg.o \ + gen164/5/rexp.o \ gen164/5/rub.o \ gen164/5/shax.o \ gen164/5/lore.o \ @@ -549,17 +552,21 @@ VERE_OFILES=\ $(OUT_OFILES) LIBUV=outside/libuv/libuv.a +LIBCRE=outside/cre2/lib/libcre2.a all: $(BIN)/vere $(LIBUV): $(MAKE) -C outside/libuv libuv.a +$(LIBCRE): + cd outside/cre2/src && sh build.sh + $(V_OFILES) f/loom.o f/trac.o: include/v/vere.h -$(BIN)/vere: $(VERE_OFILES) $(LIBUV) $(CAPN) +$(BIN)/vere: $(LIBCRE) $(VERE_OFILES) $(LIBUV) $(CAPN) mkdir -p $(BIN) - $(CLD) $(CLDOSFLAGS) -o $(BIN)/vere $(VERE_OFILES) $(LIBUV) $(CAPN) $(LIBS) + $(CLD) $(CLDOSFLAGS) -o $(BIN)/vere $(VERE_OFILES) $(LIBUV) $(LIBCRE) $(CAPN) $(LIBS) tags: ctags -R -f .tags --exclude=root @@ -570,4 +577,5 @@ etags: clean: $(RM) $(VERE_OFILES) $(BIN)/vere $(BIN)/eyre $(MAKE) -C outside/libuv clean + cd outside/cre2/src && sh clean.sh diff --git a/gen164/5/repg.c b/gen164/5/repg.c new file mode 100644 index 0000000000..6982a533bd --- /dev/null +++ b/gen164/5/repg.c @@ -0,0 +1,155 @@ +/* j/5/repg.c +** +** This file is in the public domain. +*/ +#include "all.h" +#include "../pit.h" +#include "cre2.h" +#include + + u2_noun // produce + j2_mbc(Pt5, repg)(u2_wire wir_r, + u2_noun lub, + u2_noun rad, + u2_noun rep) // retain + { + c3_y* lub_y = u2_cr_tape(lub); + c3_y* rad_y = u2_cr_tape(rad); + c3_y* rep_y = u2_cr_tape(rep); + + + + char* rec = (char*)lub_y; + char* end; + while(*rec != 0) { + if(*rec == '\\') { + rec++; + switch (*rec) { + case 'P': + case 'p': + free(lub_y); + free(rad_y); + return u2_nul; + case 'Q': + end = strstr(rec, "\\E"); + if(end == NULL) rec += strlen(rec) - 1; + else rec = end; + } + rec++; + } + else if(*rec == '(') { + rec++; + if(*rec == '?') { + rec++; + if(*rec != ':') { + free(lub_y); + free(rad_y); + return u2_nul; + } + rec++; + } + } + else + rec++; + } + + fprintf(stderr, "\r\nrepg: \r\n%s : %s\r\n", lub_y, rad_y); + + cre2_regexp_t * rex; + cre2_options_t * opt; + + opt = cre2_opt_new(); + if (opt) { + cre2_opt_set_log_errors(opt, 0); + cre2_opt_set_encoding(opt, CRE2_Latin1); + cre2_opt_set_perl_classes(opt, 1); + cre2_opt_set_one_line(opt, 1); + cre2_opt_set_longest_match(opt, 1); + rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt); + if (rex) { + if (!cre2_error_code(rex)) { + int text_len = strlen((char *)rad_y); + cre2_string_t matches[1]; + int ic = 0; + + u2_noun ret = u2_nul; + while (ic <= text_len) { + int match = cre2_match(rex, (const char*)rad_y, text_len, ic, text_len, CRE2_ANCHOR_START, matches, 1); + + if (!match) { + if(rad_y[ic]) + ret = u2_cn_cell((u2_atom)rad_y[ic], ret); + ic++; + } + else { + int mlen = matches[0].length; + if (mlen == 0) { + ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *) rad_y+ic)), u2_ckb_flop(u2_ci_tape((char *)rep_y))); + ic = text_len + 1; + } + else { + ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *)rep_y)), ret); + ic += mlen; + } + } + } + cre2_opt_delete(opt); + cre2_delete(rex); + free(lub_y); + free(rad_y); + free(rep_y); + return u2_cn_cell(u2_nul, u2_ckb_flop(ret)); + } + else { + // Compiling the regular expression failed + cre2_opt_delete(opt); + cre2_delete(rex); + free(lub_y); + free(rad_y); + return u2_nul; + } + cre2_opt_delete(opt); + cre2_delete(rex); + } + else { + // rex Allocation Error + cre2_opt_delete(opt); + free(lub_y); + free(rad_y); + u2_bl_bail(wir_r, c3__exit); + } + cre2_opt_delete(opt); + } + // opt Allocation Error + free(lub_y); + free(rad_y); + u2_bl_bail(wir_r, c3__exit); + return u2_nul; + } + + u2_weak // produce + j2_mb(Pt5, repg)(u2_wire wir_r, + u2_noun cor) // retain + { + u2_noun lub; + u2_noun rad; + u2_noun rep; + + if ( (u2_none == (lub = u2_frag(u2_cv_sam_2, cor))) || + (u2_none == (rad = u2_frag(u2_cv_sam_6, cor))) || + (u2_none == (rep = u2_frag(u2_cv_sam_7, cor))) ) + { + return u2_bl_bail(wir_r, c3__fail); + } else { + return j2_mbc(Pt5, repg)(wir_r, lub, rad, rep); + } + } + + +/* structures +*/ + u2_ho_jet + j2_mbj(Pt5, repg)[] = { + { ".2", c3__lite, j2_mb(Pt5, repg), u2_jet_live | u2_jet_test, u2_none, u2_none }, + { } + }; diff --git a/gen164/5/rexp.c b/gen164/5/rexp.c new file mode 100644 index 0000000000..4dfd0e5567 --- /dev/null +++ b/gen164/5/rexp.c @@ -0,0 +1,154 @@ +/* j/5/rexp.c +** +** This file is in the public domain. +*/ +#include "all.h" +#include "../pit.h" +#include "cre2.h" +#include + + u2_noun // produce + j2_mbc(Pt5, rexp)(u2_wire wir_r, + u2_noun lub, + u2_noun rad) // retain + { + c3_y* lub_y = u2_cr_tape(lub); + c3_y* rad_y = u2_cr_tape(rad); + + u2k(lub); + int lub_l = u2_ckb_lent(lub); + if (lub_l != strlen((char *)lub_y)) { + free(lub_y); + free(rad_y); + return u2_nul; + } + + char* rec = (char*)lub_y; + char* end; + while(*rec != 0) { + if(*rec > 127) { + free(lub_y); + free(rad_y); + return u2_nul; + } + else if(*rec == '\\') { + rec++; + switch (*rec) { + case 'P': + case 'p': + free(lub_y); + free(rad_y); + return u2_nul; + case 'Q': + end = strstr(rec, "\\E"); + if(end == NULL) rec += strlen(rec) - 1; + else rec = end; + } + } + else if(*rec == '(') { + rec++; + if(*rec == '?') { + rec++; + if(*rec != ':') { + free(lub_y); + free(rad_y); + return u2_nul; + } + rec++; + } + } + else + rec++; + } + + fprintf(stderr, "\r\n%s : %s\r\n", lub_y, rad_y); + + cre2_regexp_t * rex; + cre2_options_t * opt; + + opt = cre2_opt_new(); + if (opt) { + cre2_opt_set_log_errors(opt, 0); + cre2_opt_set_encoding(opt, CRE2_UTF8); + cre2_opt_set_perl_classes(opt, 1); + cre2_opt_set_one_line(opt, 1); + cre2_opt_set_longest_match(opt, 1); + rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt); + if (rex) { + if (!cre2_error_code(rex)) { + int text_len = strlen((char *)rad_y); + int captures = cre2_num_capturing_groups(rex); + cre2_string_t matches[captures+1]; + + int match = cre2_match(rex, (const char*)rad_y, text_len, 0, text_len, CRE2_UNANCHORED, matches, captures+1); + + if (!match) { + // No matches + cre2_opt_delete(opt); + cre2_delete(rex); + free(lub_y); + free(rad_y); + return u2_cn_cell(u2_nul, u2_nul); + } + + u2_noun map = u2_nul; + + int i; + for (i = 0; i < captures+1; i++) { + char * buf = malloc(matches[i].length + 1); + memcpy(buf, matches[i].data, matches[i].length); + buf[matches[i].length] = 0; + fprintf(stderr, "%d: %s\r\n", i, buf); + map = u2_ckd_by_put(map, i, u2_ci_tape(buf)); + free(buf); + } + + cre2_opt_delete(opt); + cre2_delete(rex); + free(lub_y); + free(rad_y); + return u2_cn_cell(u2_nul, u2_cn_cell(u2_nul, map)); + + } + else { + // Compiling the regular expression failed + cre2_opt_delete(opt); + cre2_delete(rex); + free(lub_y); + free(rad_y); + return u2_nul; + } + cre2_delete(rex); + } + cre2_opt_delete(opt); + } + free(lub_y); + free(rad_y); + u2_bl_bail(wir_r, c3__exit); + return u2_nul; + } + + u2_weak // produce + j2_mb(Pt5, rexp)(u2_wire wir_r, + u2_noun cor) // retain + { + u2_noun lub; + u2_noun rad; + + if ( (u2_none == (lub = u2_frag(u2_cv_sam_2, cor))) || + (u2_none == (rad = u2_frag(u2_cv_sam_3, cor))) ) + { + return u2_bl_bail(wir_r, c3__fail); + } else { + return j2_mbc(Pt5, rexp)(wir_r, lub, rad); + } + } + + +/* structures +*/ + u2_ho_jet + j2_mbj(Pt5, rexp)[] = { + { ".2", c3__lite, j2_mb(Pt5, rexp), u2_jet_live | u2_jet_test, u2_none, u2_none }, + { } + }; diff --git a/gen164/watt.c b/gen164/watt.c index 9328eab95f..407e514e98 100644 --- a/gen164/watt.c +++ b/gen164/watt.c @@ -87,6 +87,8 @@ extern u2_ho_jet j2_mbj(Pt5, pfix)[]; extern u2_ho_jet j2_mbj(Pt5, plug)[]; extern u2_ho_jet j2_mbj(Pt5, pose)[]; + extern u2_ho_jet j2_mbj(Pt5, repg)[]; + extern u2_ho_jet j2_mbj(Pt5, rexp)[]; extern u2_ho_jet j2_mbj(Pt5, rub)[]; extern u2_ho_jet j2_mbj(Pt5, sfix)[]; extern u2_ho_jet j2_mbj(Pt5, shax)[]; @@ -229,6 +231,8 @@ { j2_sb(Pt5, pfix), j2_mbj(Pt5, pfix), 0, 0, u2_none }, { j2_sb(Pt5, plug), j2_mbj(Pt5, plug), 0, 0, u2_none }, { j2_sb(Pt5, pose), j2_mbj(Pt5, pose), 0, 0, u2_none }, + { j2_sb(Pt5, repg), j2_mbj(Pt5, repg), 0, 0, u2_none }, + { j2_sb(Pt5, rexp), j2_mbj(Pt5, rexp), 0, 0, u2_none }, { j2_sb(Pt5, rub), j2_mbj(Pt5, rub), 0, 0, u2_none }, { j2_sb(Pt5, sfix), j2_mbj(Pt5, sfix), 0, 0, u2_none }, { j2_sb(Pt5, shax), j2_mbj(Pt5, shax), 0, 0, u2_none }, diff --git a/outside/cre2/share/doc/cre2/COPYING b/outside/cre2/share/doc/cre2/COPYING new file mode 100644 index 0000000000..672d3b902b --- /dev/null +++ b/outside/cre2/share/doc/cre2/COPYING @@ -0,0 +1,35 @@ +Copyright (c) 2012 Marco Maggi +Copyright (c) 2011 Keegan McAllister +All rights reserved. + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. Neither the name of the author nor the names of his + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/outside/cre2/share/doc/cre2/LICENSE.re2 b/outside/cre2/share/doc/cre2/LICENSE.re2 new file mode 100644 index 0000000000..09e5ec1c74 --- /dev/null +++ b/outside/cre2/share/doc/cre2/LICENSE.re2 @@ -0,0 +1,27 @@ +// Copyright (c) 2009 The RE2 Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/outside/cre2/share/doc/cre2/README b/outside/cre2/share/doc/cre2/README new file mode 100644 index 0000000000..d1d2543141 --- /dev/null +++ b/outside/cre2/share/doc/cre2/README @@ -0,0 +1,175 @@ + + C wrapper for re2 + ================= + + +Topics +------ + + 1. Introduction + 2. License + 3. Install + 4. Usage + A. Credits + B. Bugs + C. Resources + + +1. Introduction +--------------- + +The CRE2 distribution is a C language wrapper for the RE2 +library, which is implemented in C++. RE2 is a fast, safe, +thread-friendly alternative to backtracking regular +expression engines like those used in PCRE, Perl, and +Python. + + This distribution makes use of the GNU Autotools. + + +2. License +---------- + +Copyright (c) 2012, 2013 Marco Maggi +Copyright (c) 2011 Keegan McAllister +All rights reserved. + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. Neither the name of the author nor the names of his + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +3. Install +---------- + +To install RE2 follow the instructions in the README file in +RE2's. To install CRE2 from a proper release tarball, do +this: + + $ cd cre2-0.1.0 + $ mkdir "=build" + $ cd "=build" + +to inspect the available configuration options: + + $ ../configure --help + +then do it: + + $ ../configure [options] + $ make + $ make check + $ make install + + From a repository checkout or snapshot (the ones from the +Github site): we may need to manually run "libtoolize" the +first time, then we must first run the script "autogen.sh" +from the top source directory, to generate the needed files: + + $ sh autogen.sh + +for this we need to have installed the GNU Autotools: +Automake, Autoconf, Libtool. After this: + + $ ../configure --enable-maintainer-mode [options] + $ make + $ make check + $ make install + + The Makefile supports the DESTDIR environment variable to +install files in a temporary location, example: to see what +will happen: + + $ make -n install DESTDIR=/tmp/marco/cre2 + +to really do it: + + $ make install DESTDIR=/tmp/marco/cre2 + + +4. Usage +-------- + +Read the documentation. + + +A. Credits +---------- + +RE2 is a Google project. CRE2 is based on code by Keegan +McAllister. This distribution was assembled by Marco Maggi. + + +B. Bugs +------- + +Bug reports are appreciated. Register issues at the CRE2 +issue tracker: + + + + +C. Resources +------------ + +The GNU Project software can be found here: + + + +RE2 is available at: + + + +development of this package happens at: + + + +and as backup at: + + + +proper release tarballs for this package are in the download +area at: + + + +the documentation is available online: + + + + +### end of file +# Local Variables: +# mode: text +# coding: utf-8-unix +# fill-column: 60 +# paragraph-start: "*" +# End: diff --git a/outside/cre2/share/info/cre2.info b/outside/cre2/share/info/cre2.info new file mode 100644 index 0000000000..b0ba9e3efd --- /dev/null +++ b/outside/cre2/share/info/cre2.info @@ -0,0 +1,1489 @@ +This is cre2.info, produced by makeinfo version 5.2 from cre2.texi. + +This document describes version 0.1b5 of CRE2, a C language wrapper for +the C++ library RE2: a fast, safe, thread-friendly alternative to +backtracking regular expression engines like those used in PCRE, Perl, +and Python. + + The package is distributed under the terms of a BSD-like license and +can be downloaded from: + + + +development takes place at: + + + +and as a backup at: + + + +Copyright (C) 2012 by Marco Maggi +Copyright (C) 2011 by Keegan McAllister + + Portions of this document come from the source code of RE2 itself, +see the file 'LICENSE.re2' for the license notice. + + Permission is granted to copy, distribute and/or modify this + document under the terms of the GNU Free Documentation License, + Version 1.3 or any later version published by the Free Software + Foundation; with Invariant Sections being "GNU Free Documentation + License" and "GNU General Public License", no Front-Cover Texts, + and no Back-Cover Texts. A copy of the license is included in the + section entitled "GNU Free Documentation License". +INFO-DIR-SECTION Development +START-INFO-DIR-ENTRY +* cre2: (cre2). C wrapper for RE2. +END-INFO-DIR-ENTRY + + +File: cre2.info, Node: Top, Next: overview, Up: (dir) + +C wrapper for RE2 +***************** + +This document describes version 0.1b5 of CRE2, a C language wrapper for +the C++ library RE2: a fast, safe, thread-friendly alternative to +backtracking regular expression engines like those used in PCRE, Perl, +and Python. + + The package is distributed under the terms of a BSD-like license and +can be downloaded from: + + + +development takes place at: + + + +and as a backup at: + + + +Copyright (C) 2012 by Marco Maggi +Copyright (C) 2011 by Keegan McAllister + + Portions of this document come from the source code of RE2 itself, +see the file 'LICENSE.re2' for the license notice. + + Permission is granted to copy, distribute and/or modify this + document under the terms of the GNU Free Documentation License, + Version 1.3 or any later version published by the Free Software + Foundation; with Invariant Sections being "GNU Free Documentation + License" and "GNU General Public License", no Front-Cover Texts, + and no Back-Cover Texts. A copy of the license is included in the + section entitled "GNU Free Documentation License". + +* Menu: + +* overview:: Overview of the package. +* version:: Version functions. +* regexps:: Precompiled regular expressions + construction. +* options:: Matching configuration. +* matching:: Matching regular expressions. +* other:: Other matching functions. +* tips:: Tips for using the regexp syntax. + +Appendices + +* Package License:: Package license. +* Documentation License:: GNU Free Documentation License. +* references:: Bibliography and references. + +Indexes + +* concept index:: An entry for each concept. +* function index:: An entry for each function. +* variable index:: An entry for each variable. +* type index:: An entry for each type. + + +File: cre2.info, Node: overview, Next: version, Prev: Top, Up: Top + +1 Overview of the package +************************* + +CRE2 is a C language wrapper for the C++ library RE2: a fast, safe, +thread-friendly alternative to backtracking regular expression engines +like those used in PCRE, Perl, and Python. CRE2 is based on code by +Keegan McAllister for the 'haskell-re2' binding: + + + + For the supported regular expressions syntax we should refer to the +original documentation: + + + + The C wrapper is meant to make it easier to interface RE2 with other +languages. The exposed API allows searching for substrings of text +matching regular expressions and reporting portions of text matching +parenthetical subexpressions. + + CRE2 installs the single header file 'cre2.h'. All the function +names in the API are prefixed with 'cre2_'; all the constant names are +prefixed with 'CRE2_'; all the type names are prefixed with 'cre2_' and +suffixed with '_t'. + + When searching for the installed libraries with the GNU Autotools, we +can use the following macros in 'configure.ac': + + AC_CHECK_LIB([re2],[main],, + [AC_MSG_FAILURE([test for RE2 library failed])]) + + AC_CHECK_LIB([cre2],[cre2_version_string],, + [AC_MSG_FAILURE([test for CRE2 library failed])]) + AC_CHECK_HEADERS([cre2.h],, + [AC_MSG_ERROR([test for RE2 header failed])]) + +notice that there is no need to check for the header file 're2/re2.h'. + + It is customary for regular expression engines to provide methods to +replace backslash sequences like '\1', '\2', ... in a given string with +portions of text that matched the first, second, ... parenthetical +subexpression; CRE2 does *not* provide such methods in its public API, +because they require interacting with the storage mechanism in the +client code. However, it is not difficult to implement such +substitutions given the results of a regular expression matching +operation. + + Some functions and methods from RE2 requiring memory allocation +handling are unofficially wrapped by CRE2 with unsafe code (execution +will succeed when no memory allocation errors happen). These +"problematic" functions are documented in the header file 'cre2.h' and, +at present, are not considered part of the public API of CRE2. + + It is sometimes useful to try a program in the original C++ to verify +if a problem is caused by CRE2 or is in the original RE2 code; we may +want to start by customising this program: + + /* compile and run with: + + $ g++ -Wall -o proof proof.cpp -lre2 && ./proof + */ + + #include + #include + + static void try_match (RE2::Options& opt, const char * text); + + int + main (int argc, const char *const argv[]) + { + RE2::Options opt; + opt.set_never_nl(true); + try_match(opt, "abcdef"); + return 0; + } + void + try_match (RE2::Options& opt, const char * text) + { + RE2 re("abcdef", opt); + assert(re.ok()); + assert(RE2::FullMatch(text, re)); + //assert(RE2::PartialMatch(text, re)); + } + + +File: cre2.info, Node: version, Next: regexps, Prev: overview, Up: Top + +2 Version functions +******************* + +The installed libraries follow version numbering as established by the +GNU Autotools. For an explanation of interface numbers as managed by +GNU Libtool *Note interface: (libtool)Libtool versioning. + + -- Function: const char * cre2_version_string (void) + Return a pointer to a statically allocated ASCIIZ string + representing the interface version number. + + -- Function: int cre2_version_interface_current (void) + Return an integer representing the library interface current + number. + + -- Function: int cre2_version_interface_revision (void) + Return an integer representing the library interface current + revision number. + + -- Function: int cre2_version_interface_age (void) + Return an integer representing the library interface current age. + + +File: cre2.info, Node: regexps, Next: options, Prev: version, Up: Top + +3 Precompiled regular expressions construction +********************************************** + +Regular expression objects are built and finalised as follows: + + cre2_regexp_t * rex; + cre2_options_t * opt; + + opt = cre2_opt_new(); + if (opt) { + cre2_opt_set_log_errors(opt, 0); + rex = cre2_new("ciao", 4, opt); + if (rex) { + if (!cre2_error_code(rex)) + /* successfully built */ + else + /* an error occurred while compiling rex */ + cre2_delete(rex); + } else { + /* rex memory allocation error */ + } + cre2_opt_delete(opt); + } else { + /* opt memory allocation error */ + } + + -- Opaque Type: cre2_regexp_t + Opaque type for regular expression objects; it is meant to be used + to declare pointers to objects. Instances of this type can be used + for any number of matching operations and are safe for concurrent + use by multiple threads. + + -- Struct Typedef: cre2_string_t + Simple data structure used to reference a portion of another + string. It has the following fields: + + 'const char * data' + Pointer to the first byte in the referenced substring. + + 'int length' + The number of bytes in the referenced substring. + + -- Enumeration Typedef: cre2_error_code_t + Enumeration type for error codes returned by 'cre2_error_code()'. + It contains the following symbols: + + 'CRE2_NO_ERROR' + Defined as '0', represents a successful operation. + + 'CRE2_ERROR_INTERNAL' + Unexpected error. + + 'CRE2_ERROR_BAD_ESCAPE' + Bad escape sequence. + + 'CRE2_ERROR_BAD_CHAR_CLASS' + Bad character class. + + 'CRE2_ERROR_BAD_CHAR_RANGE' + Bad character class range. + + 'CRE2_ERROR_MISSING_BRACKET' + Missing closing ']'. + + 'CRE2_ERROR_MISSING_PAREN' + Missing closing ')'. + + 'CRE2_ERROR_TRAILING_BACKSLASH' + Trailing '\' at end of regexp. + + 'CRE2_ERROR_REPEAT_ARGUMENT' + Repeat argument missing, e.g. '*'. + + 'CRE2_ERROR_REPEAT_SIZE' + Bad repetition argument. + + 'CRE2_ERROR_REPEA_TOP' + Bad repetition operator. + + 'CRE2_ERROR_BAD_PERL_OP' + Bad Perl operator. + + 'CRE2_ERROR_BAD_UTF8' + Invalid UTF-8 in regexp. + + 'CRE2_ERROR_BAD_NAMED_CAPTURE' + Bad named capture group. + + 'CRE2_ERROR_PATTERN_TOO_LARGE' + Pattern too large (compile failed). + + -- Function: cre2_regexp_t * cre2_new (const char * PATTERN, int + PATTERN_LEN, const cre2_options_t * OPT) + Build and return a new regular expression object representing the + PATTERN of length PATTERN_LEN bytes; the object is configured with + the options in OPT. If memory allocation fails: the return value + is a 'NULL' pointer. + + The options object OPT is duplicated in the internal state of the + regular expression instance, so OPT can be safely mutated or + finalised after this call. If OPT is 'NULL': the regular + expression object is built with the default set of options. + + -- Function: void cre2_delete (cre2_regexp_t * REX) + Finalise a regular expression object releasing all the associated + resources. + + -- Function: const char * cre2_pattern (const cre2_regexp_t * REX) + Whether REX is a successfully built regular expression object or + not: return a pointer to the pattern string. The returned pointer + is valid only while REX is alive: if 'cre2_delete()' is applied to + REX the pointer becomes invalid. + + -- Function: int cre2_num_capturing_groups (const cre2_regexp_t * REX) + If REX is a successfully built regular expression object: return a + non-negative integer representing the number of capturing groups + (parenthetical subexpressions) in the pattern. If an error + occurred while building REX: return '-1'. + + -- Function: int cre2_program_size (const cre2_regexp_t * REX) + If REX is a successfully built regular expression object: return a + non-negative integer representing the program size, a very + approximate measure of a regexp's "cost"; larger numbers are more + expensive than smaller numbers. If an error occurred while + building REX: return '-1'. + + -- Function: int cre2_error_code (const cre2_regexp_t * REX) + In case an error occurred while building REX: return an integer + representing the associated error code. Return zero if no error + occurred. + + -- Function: const char * cre2_error_string (const cre2_regexp_t * REX) + If an error occurred while building REX: return a pointer to an + ASCIIZ string representing the associated error message. The + returned pointer is valid only while REX is alive: if + 'cre2_delete()' is applied to REX the pointer becomes invalid. + + If REX is a successfully built regular expression object: return a + pointer to an empty string. + + The following code: + + cre2_regexp_t * rex; + + rex = cre2_new("ci(ao", 5, NULL); + { + printf("error: code=%d, msg=\"%s\"\n", + cre2_error_code(rex), + cre2_error_string(rex)); + } + cre2_delete(rex); + + prints: + + error: code=6, msg="missing ): ci(ao" + + -- Function: void cre2_error_arg (const cre2_regexp_t * REX, + cre2_string_t * ARG) + If an error occurred while building REX: fill the structure + referenced by ARG with the interval of bytes representing the + offending portion of the pattern. + + If REX is a successfully built regular expression object: ARG + references an empty string. + + The following code: + + cre2_regexp_t * rex; + cre2_string_t S; + + rex = cre2_new("ci(ao", 5, NULL); + { + cre2_error_arg(rex, &S); + printf("arg: len=%d, data=\"%s\"\n", S.length, S.data); + } + cre2_delete(rex); + + prints: + + arg: len=5 data="ci(ao" + + +File: cre2.info, Node: options, Next: matching, Prev: regexps, Up: Top + +4 Matching configuration +************************ + +Compiled regular expressions can be configured, at construction-time, +with a number of options collected in a 'cre2_options_t' object. Notice +that, by default, when attempting to compile an invalid regular +expression pattern, RE2 will print to 'stderr' an error message; usually +we want to avoid this logging by disabling the associated option: + + cre2_options_t * opt; + + opt = cre2_opt_new(); + cre2_opt_set_log_errors(opt, 0); + + -- Opaque Typedef: cre2_options_t + Type of opaque pointers to options objects. Any instance of this + type can be used to configure any number of regular expression + objects. + + -- Enumeration Typedef: cre2_encoding_t + Enumeration type for constants selecting encoding. It contains the + following values: + + CRE2_UNKNOWN + CRE2_UTF8 + CRE2_Latin1 + + The value 'CRE2_UNKNOWN' should never be used: it exists only in + case there is a mismatch between the definitions of RE2 and CRE2. + + -- Function: cre2_options_t * cre2_opt_new (void) + Allocate and return a new options object. If memory allocation + fails: the return value is a 'NULL' pointer. + + -- Function: void cre2_opt_delete (cre2_options_t * OPT) + Finalise an options object releasing all the associated resources. + Compiled regular expressions configured with this object are *not* + affected by its destruction. + + All the following functions are getters and setters for regular +expression options; the FLAG argument to the setter must be false to +disable the option and true to enable it; unless otherwise specified the +'int' return value is true if the option is enabled and false if it is +disabled. + + -- Function: cre2_encoding_t cre2_opt_encoding (cre2_options_t * OPT) + -- Function: void cre2_opt_set_encoding (cre2_options_t * OPT, + cre2_encoding_t ENC) + By default, the regular expression pattern and input text are + interpreted as UTF-8. CRE2_Latin1 encoding causes them to be + interpreted as Latin-1. + + The getter returns 'CRE2_UNKNOWN' if the encoding value returned by + RE2 is unknown. + + -- Function: int cre2_opt_posix_syntax (cre2_options_t * OPT) + -- Function: void cre2_opt_set_posix_syntax (cre2_options_t * OPT, int + FLAG) + Restrict regexps to POSIX egrep syntax. Default is disabled. + + -- Function: int cre2_opt_longest_match (cre2_options_t * OPT) + -- Function: void cre2_opt_set_longest_match (cre2_options_t * OPT, int + FLAG) + Search for longest match, not first match. Default is disabled. + + -- Function: int cre2_opt_log_errors (cre2_options_t * OPT) + -- Function: void cre2_opt_set_log_errors (cre2_options_t * OPT, int + FLAG) + Log syntax and execution errors to 'stderr'. Default is enabled. + + -- Function: int cre2_opt_literal (cre2_options_t * OPT) + -- Function: void cre2_opt_set_literal (cre2_options_t * OPT, int FLAG) + Interpret the pattern string as literal, not as regular expression. + Default is disabled. + + Setting this option is equivalent to quoting all the special + characters defining a regular expression pattern: + + cre2_regexp_t * rex; + cre2_options_t * opt; + const char * pattern = "(ciao) (hello)"; + const char * text = pattern; + int len = strlen(pattern); + + opt = cre2_opt_new(); + cre2_opt_set_literal(opt, 1); + rex = cre2_new(pattern, len, opt); + { + /* successful match */ + cre2_match(rex, text, len, 0, len, + CRE2_UNANCHORED, NULL, 0); + } + cre2_delete(rex); + cre2_opt_delete(opt); + + -- Function: int cre2_opt_never_nl (cre2_options_t * OPT) + -- Function: void cre2_opt_set_never_nl (cre2_options_t * OPT, int + FLAG) + Never match a newline character, even if it is in the regular + expression pattern; default is disabled. Turning on this option + allows us to attempt a partial match, against the beginning of a + multiline text, without using subpatterns to exclude the newline in + the regexp pattern. + + * When set to true: matching always fails if the text or the + regexp contains a newline. + + * When set to false: matching succeeds or fails taking normal + account of newlines. + + * The option does *not* cause newlines to be skipped. + + -- Function: int cre2_opt_case_sensitive (cre2_options_t * OPT) + -- Function: void cre2_opt_set_case_sensitive (cre2_options_t * OPT, + int FLAG) + Match is case-sensitive; the regular expression pattern can + override this setting with '(?i)' unless configured in POSIX syntax + mode. Default is enabled. + + -- Function: int cre2_opt_max_mem (cre2_options_t * OPT) + -- Function: void cre2_opt_set_max_mem (cre2_options_t * OPT, int M) + The max memory option controls how much memory can be used to hold + the compiled form of the regular expression and its cached DFA + graphs. These functions set and get such amount of memory. See + the documentation of RE2 for details. + + The following options are only consulted when POSIX syntax is +enabled; when POSIX syntax is disabled: these features are always +enabled and cannot be turned off. + + -- Function: int cre2_opt_perl_classes (cre2_options_t * OPT) + -- Function: void cre2_opt_set_perl_classes (cre2_options_t * OPT, int + FLAG) + Allow Perl's '\d', '\s', '\w', '\D', '\S', '\W'. Default is + disabled. + + -- Function: int cre2_opt_word_boundary (cre2_options_t * OPT) + -- Function: void cre2_opt_set_word_boundary (cre2_options_t * OPT, int + FLAG) + Allow Perl's '\b', '\B' (word boundary and not). Default is + disabled. + + -- Function: int cre2_opt_one_line (cre2_options_t * OPT) + -- Function: void cre2_opt_set_one_line (cre2_options_t * OPT, int + FLAG) + The patterns '^' and '$' only match at the beginning and end of the + text. Default is disabled. + + +File: cre2.info, Node: matching, Next: other, Prev: options, Up: Top + +5 Matching regular expressions +****************************** + +Basic pattern matching goes as follows (with error checking omitted): + + cre2_regexp_t * rex; + cre2_options_t * opt; + const char * pattern = "(ciao) (hello)"; + + opt = cre2_opt_new(); + cre2_opt_set_posix_syntax(opt, 1); + + rex = cre2_new(pattern, strlen(pattern), opt); + { + const char * text = "ciao hello"; + int text_len = strlen(text); + int nmatch = 3; + cre2_string_t match[nmatch]; + + cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, + match, nmatch); + + /* prints: full match: ciao hello */ + printf("full match: "); + fwrite(match[0].data, match[0].length, 1, stdout); + printf("\n"); + + /* prints: first group: ciao */ + printf("first group: "); + fwrite(match[1].data, match[1].length, 1, stdout); + printf("\n"); + + /* prints: second group: hello */ + printf("second group: "); + fwrite(match[2].data, match[2].length, 1, stdout); + printf("\n"); + } + cre2_delete(rex); + cre2_opt_delete(opt); + + -- Enumeration Typedef: cre2_anchor_t + Enumeration type for the anchor point of matching operations. It + contains the following constants: + + CRE2_UNANCHORED + CRE2_ANCHOR_START + CRE2_ANCHOR_BOTH + + -- Function: int cre2_match (const cre2_regexp_t * REX, const char * + TEXT, int TEXT_LEN, int START_POS, int END_POS, cre2_anchor_t + ANCHOR, cre2_string_t * MATCH, int NMATCH) + Match a substring of the text referenced by TEXT and holding + TEXT_LEN bytes against the regular expression object REX. Return + true if the text matched, false otherwise. + + The zero-based indices START_POS (inclusive) and END_POS + (exclusive) select the substring of TEXT to be examined. ANCHOR + selects the anchor point for the matching operation. + + Data about the matching groups is stored in the array MATCH, which + must have at least NMATCH entries; the referenced substrings are + portions of the TEXT buffer. If we are only interested in + verifying if the text matches or not (ignoring the matching + portions of text): we can use 'NULL' as MATCH argument and 0 as + NMATCH argument. + + The first element of MATCH (index 0) references the full portion of + the substring of TEXT matching the pattern; the second element of + MATCH (index 1) references the portion of text matching the first + parenthetical subexpression, the third element of MATCH (index 2) + references the portion of text matching the second parenthetical + subexpression; and so on. + + -- Function: int cre2_easy_match (const char * PATTERN, int + PATTERN_LEN, const char * TEXT, int TEXT_LEN, cre2_string_t * + MATCH, int NMATCH) + Like 'cre2_match()' but the pattern is specified as string PATTERN + holding PATTERN_LEN bytes. Also the text is fully matched without + anchoring. + + If the text matches the pattern: the return value is 1. If the + text does not match the pattern: the return value is 0. If the + pattern is invalid: the return value is 2. + + -- Struct Typedef: cre2_range_t + Structure type used to represent a substring of the text to be + matched as starting and ending indices. It has the following + fields: + + 'long start' + Inclusive start byte index. + + 'long past' + Exclusive end byte index. + + -- Function: void cre2_strings_to_ranges (const char * TEXT, + cre2_range_t * RANGES, cre2_string_t * STRINGS, int NMATCH) + Given an array of STRINGS with NMATCH elements being the result of + matching TEXT against a regular expression: fill the array of + RANGES with the index intervals in the TEXT buffer representing the + same results. + + +File: cre2.info, Node: other, Next: tips, Prev: matching, Up: Top + +6 Other matching functions +************************** + +The following functions match a buffer of text against a regular +expression, allowing the extraction of portions of text matching +parenthetical subexpressions. All of them show the following behaviour: + + * If the text matches the pattern: the return value is 1; if the text + does not match the pattern: the return value is 0. + + * If the pattern is invalid: the return value is 0; there is no way + to distinguish this case from the case of text not matching other + than looking at what RE2 prints to 'stderr'. + + * It is impossible to turn off logging of error messages to 'stderr' + when the specification of the regular expression is invalid. + + * Data about the matching groups is stored in the array MATCH, which + must have at least NMATCH slots; the referenced substrings are + portions of the TEXT buffer. + + * The array MATCH can have a number of slots between zero (included) + and the number of parenthetical subexpressions in PATTERN + (excluded); if NMATCH is greater than the number of parenthetical + subexpressions: the return value is 0. + + * If we are only interested in verifying if the text matches the + pattern or not: we can use 'NULL' as MATCH argument and 0 as NMATCH + argument. + + * The first slot of MATCH (index 0) references the portion of text + matching the first parenthetical subexpression; the second slot of + MATCH (index 1) references the portion of text matching the second + parenthetical subexpression; and so on. + +see the documentation of each function for the differences. + + The following example is a successful match: + + const char * pattern = "ci.*ut"; + const char * text = "ciao salut"; + cre2_string_t input = { + .data = text, + .length = strlen(text) + }; + int result; + result = cre2_full_match(pattern, &input, NULL, 0); + + result => 1 + +the following example is a successful match in which the parenthetical +subexpression is ignored: + + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { + .data = text, + .length = strlen(text) + }; + int result; + result = cre2_full_match(pattern, &input, NULL, 0); + + result => 1 + +the following example is a successful match in which the portion of text +matching the parenthetical subexpression is reported: + + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { + .data = text, + .length = strlen(text) + }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_full_match(pattern, &input, match, nmatch); + + result => 1 + strncmp(text, input.data, input.length) => 0 + strncmp("ciao", match[0].data, match[0].length) => 0 + + -- Function: int cre2_full_match (const char * PATTERN, const + cre2_string_t * TEXT, cre2_string_t * MATCH, int NMATCH) + -- Function: int cre2_full_match_re (cre2_regexp_t * REX, const + cre2_string_t * TEXT, cre2_string_t * MATCH, int NMATCH) + Match the zero-terminated string PATTERN or the precompiled regular + expression REX against the full buffer TEXT. + + For example: the text 'abcdef' matches the pattern 'abcdef' + according to this function, but neither the pattern 'abc' nor the + pattern 'def' will match. + + -- Function: int cre2_partial_match (const char * PATTERN, const + cre2_string_t * TEXT, cre2_string_t * MATCH, int NMATCH) + -- Function: int cre2_partial_match_re (cre2_regexp_t * REX, const + cre2_string_t * TEXT, cre2_string_t * MATCH, int NMATCH) + Match the zero-terminated string PATTERN or the precompiled regular + expression REX against the buffer TEXT, resulting in success if a + substring of TEXT matches; these functions behave like the full + match ones, but the matching text does not need to be anchored to + the beginning and end. + + For example: the text 'abcDEFghi' matches the pattern 'DEF' + according to this function. + + -- Function: int cre2_consume (const char * PATTERN, cre2_string_t * + TEXT, cre2_string_t * MATCH, int NMATCH) + -- Function: int cre2_consume_re (cre2_regexp_t * REX, cre2_string_t * + TEXT, cre2_string_t * MATCH, int NMATCH) + Match the zero-terminated string PATTERN or the precompiled regular + expression REX against the buffer TEXT, resulting in success if the + prefix of TEXT matches. The data structure referenced by TEXT is + mutated to reference text right after the last byte that matched + the pattern. + + For example: the text 'abcDEF' matches the pattern 'abc' according + to this function; after the call TEXT will reference the text + 'DEF'. + + -- Function: int cre2_find_and_consume (const char * PATTERN, + cre2_string_t * TEXT, cre2_string_t * MATCH, int NMATCH) + -- Function: int cre2_find_and_consume_re (cre2_regexp_t * REX, + cre2_string_t * TEXT, cre2_string_t * MATCH, int NMATCH) + Match the zero-terminated string PATTERN or the precompiled regular + expression REX against the buffer TEXT, resulting in success if, + after skipping a non-matching prefix in TEXT, a substring of TEXT + matches. The data structure referenced by TEXT is mutated to + reference text right after the last byte that matched the pattern. + + For example: the text 'abcDEFghi' matches the pattern 'DEF' + according to this function; the prefix 'abc' is skipped; after the + call TEXT will reference the text 'ghi'. + + +File: cre2.info, Node: tips, Next: Package License, Prev: other, Up: Top + +7 Tips for using the regexp syntax +********************************** + +* Menu: + +* tips dot:: Matching newlines with the + '.' subpattern. + + +File: cre2.info, Node: tips dot, Up: tips + +7.1 Matching newlines with the '.' subpattern +============================================= + +By default the dot subpattern '.' matches any character but newlines; to +enable newline matching we have to enable the 's' flag using the special +subpattern '(?)' or '(?:)', where '' is a +sequence of characters, one character for each flag, and '' is a +regexp subpattern. Notice that the parentheses in '(?:)' are +non-capturing. + + So let's consider the text 'ciao\nhello': + + * The regexp 'ciao.hello' does *not* match because 's' is disabled. + + * The regexp '(?s)ciao.hello' matches because the subpattern '(?s)' + has enabled flag 's' for the rest of the pattern, including the + dot. + + * The regexp 'ciao(?s).hello' matches because the subpattern '(?s)' + has enabled flag 's' for the rest of the pattern, including the + dot. + + * The regexp 'ciao(?s:.)hello' matches because the subpattern + '(?s:.)' has enabled flag 's' for the subpattern '.' which is the + dot. + + +File: cre2.info, Node: Package License, Next: Documentation License, Prev: tips, Up: Top + +Appendix A Package license +************************** + +Copyright (C) 2012 Marco Maggi +Copyright (C) 2011 Keegan McAllister +All rights reserved. + + Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +File: cre2.info, Node: Documentation License, Next: references, Prev: Package License, Up: Top + +Appendix B GNU Free Documentation License +***************************************** + + Version 1.3, 3 November 2008 + + Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. + + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + 0. PREAMBLE + + The purpose of this License is to make a manual, textbook, or other + functional and useful document "free" in the sense of freedom: to + assure everyone the effective freedom to copy and redistribute it, + with or without modifying it, either commercially or + noncommercially. Secondarily, this License preserves for the + author and publisher a way to get credit for their work, while not + being considered responsible for modifications made by others. + + This License is a kind of "copyleft", which means that derivative + works of the document must themselves be free in the same sense. + It complements the GNU General Public License, which is a copyleft + license designed for free software. + + We have designed this License in order to use it for manuals for + free software, because free software needs free documentation: a + free program should come with manuals providing the same freedoms + that the software does. But this License is not limited to + software manuals; it can be used for any textual work, regardless + of subject matter or whether it is published as a printed book. We + recommend this License principally for works whose purpose is + instruction or reference. + + 1. APPLICABILITY AND DEFINITIONS + + This License applies to any manual or other work, in any medium, + that contains a notice placed by the copyright holder saying it can + be distributed under the terms of this License. Such a notice + grants a world-wide, royalty-free license, unlimited in duration, + to use that work under the conditions stated herein. The + "Document", below, refers to any such manual or work. Any member + of the public is a licensee, and is addressed as "you". You accept + the license if you copy, modify or distribute the work in a way + requiring permission under copyright law. + + A "Modified Version" of the Document means any work containing the + Document or a portion of it, either copied verbatim, or with + modifications and/or translated into another language. + + A "Secondary Section" is a named appendix or a front-matter section + of the Document that deals exclusively with the relationship of the + publishers or authors of the Document to the Document's overall + subject (or to related matters) and contains nothing that could + fall directly within that overall subject. (Thus, if the Document + is in part a textbook of mathematics, a Secondary Section may not + explain any mathematics.) The relationship could be a matter of + historical connection with the subject or with related matters, or + of legal, commercial, philosophical, ethical or political position + regarding them. + + The "Invariant Sections" are certain Secondary Sections whose + titles are designated, as being those of Invariant Sections, in the + notice that says that the Document is released under this License. + If a section does not fit the above definition of Secondary then it + is not allowed to be designated as Invariant. The Document may + contain zero Invariant Sections. If the Document does not identify + any Invariant Sections then there are none. + + The "Cover Texts" are certain short passages of text that are + listed, as Front-Cover Texts or Back-Cover Texts, in the notice + that says that the Document is released under this License. A + Front-Cover Text may be at most 5 words, and a Back-Cover Text may + be at most 25 words. + + A "Transparent" copy of the Document means a machine-readable copy, + represented in a format whose specification is available to the + general public, that is suitable for revising the document + straightforwardly with generic text editors or (for images composed + of pixels) generic paint programs or (for drawings) some widely + available drawing editor, and that is suitable for input to text + formatters or for automatic translation to a variety of formats + suitable for input to text formatters. A copy made in an otherwise + Transparent file format whose markup, or absence of markup, has + been arranged to thwart or discourage subsequent modification by + readers is not Transparent. An image format is not Transparent if + used for any substantial amount of text. A copy that is not + "Transparent" is called "Opaque". + + Examples of suitable formats for Transparent copies include plain + ASCII without markup, Texinfo input format, LaTeX input format, + SGML or XML using a publicly available DTD, and standard-conforming + simple HTML, PostScript or PDF designed for human modification. + Examples of transparent image formats include PNG, XCF and JPG. + Opaque formats include proprietary formats that can be read and + edited only by proprietary word processors, SGML or XML for which + the DTD and/or processing tools are not generally available, and + the machine-generated HTML, PostScript or PDF produced by some word + processors for output purposes only. + + The "Title Page" means, for a printed book, the title page itself, + plus such following pages as are needed to hold, legibly, the + material this License requires to appear in the title page. For + works in formats which do not have any title page as such, "Title + Page" means the text near the most prominent appearance of the + work's title, preceding the beginning of the body of the text. + + The "publisher" means any person or entity that distributes copies + of the Document to the public. + + A section "Entitled XYZ" means a named subunit of the Document + whose title either is precisely XYZ or contains XYZ in parentheses + following text that translates XYZ in another language. (Here XYZ + stands for a specific section name mentioned below, such as + "Acknowledgements", "Dedications", "Endorsements", or "History".) + To "Preserve the Title" of such a section when you modify the + Document means that it remains a section "Entitled XYZ" according + to this definition. + + The Document may include Warranty Disclaimers next to the notice + which states that this License applies to the Document. These + Warranty Disclaimers are considered to be included by reference in + this License, but only as regards disclaiming warranties: any other + implication that these Warranty Disclaimers may have is void and + has no effect on the meaning of this License. + + 2. VERBATIM COPYING + + You may copy and distribute the Document in any medium, either + commercially or noncommercially, provided that this License, the + copyright notices, and the license notice saying this License + applies to the Document are reproduced in all copies, and that you + add no other conditions whatsoever to those of this License. You + may not use technical measures to obstruct or control the reading + or further copying of the copies you make or distribute. However, + you may accept compensation in exchange for copies. If you + distribute a large enough number of copies you must also follow the + conditions in section 3. + + You may also lend copies, under the same conditions stated above, + and you may publicly display copies. + + 3. COPYING IN QUANTITY + + If you publish printed copies (or copies in media that commonly + have printed covers) of the Document, numbering more than 100, and + the Document's license notice requires Cover Texts, you must + enclose the copies in covers that carry, clearly and legibly, all + these Cover Texts: Front-Cover Texts on the front cover, and + Back-Cover Texts on the back cover. Both covers must also clearly + and legibly identify you as the publisher of these copies. The + front cover must present the full title with all words of the title + equally prominent and visible. You may add other material on the + covers in addition. Copying with changes limited to the covers, as + long as they preserve the title of the Document and satisfy these + conditions, can be treated as verbatim copying in other respects. + + If the required texts for either cover are too voluminous to fit + legibly, you should put the first ones listed (as many as fit + reasonably) on the actual cover, and continue the rest onto + adjacent pages. + + If you publish or distribute Opaque copies of the Document + numbering more than 100, you must either include a machine-readable + Transparent copy along with each Opaque copy, or state in or with + each Opaque copy a computer-network location from which the general + network-using public has access to download using public-standard + network protocols a complete Transparent copy of the Document, free + of added material. If you use the latter option, you must take + reasonably prudent steps, when you begin distribution of Opaque + copies in quantity, to ensure that this Transparent copy will + remain thus accessible at the stated location until at least one + year after the last time you distribute an Opaque copy (directly or + through your agents or retailers) of that edition to the public. + + It is requested, but not required, that you contact the authors of + the Document well before redistributing any large number of copies, + to give them a chance to provide you with an updated version of the + Document. + + 4. MODIFICATIONS + + You may copy and distribute a Modified Version of the Document + under the conditions of sections 2 and 3 above, provided that you + release the Modified Version under precisely this License, with the + Modified Version filling the role of the Document, thus licensing + distribution and modification of the Modified Version to whoever + possesses a copy of it. In addition, you must do these things in + the Modified Version: + + A. Use in the Title Page (and on the covers, if any) a title + distinct from that of the Document, and from those of previous + versions (which should, if there were any, be listed in the + History section of the Document). You may use the same title + as a previous version if the original publisher of that + version gives permission. + + B. List on the Title Page, as authors, one or more persons or + entities responsible for authorship of the modifications in + the Modified Version, together with at least five of the + principal authors of the Document (all of its principal + authors, if it has fewer than five), unless they release you + from this requirement. + + C. State on the Title page the name of the publisher of the + Modified Version, as the publisher. + + D. Preserve all the copyright notices of the Document. + + E. Add an appropriate copyright notice for your modifications + adjacent to the other copyright notices. + + F. Include, immediately after the copyright notices, a license + notice giving the public permission to use the Modified + Version under the terms of this License, in the form shown in + the Addendum below. + + G. Preserve in that license notice the full lists of Invariant + Sections and required Cover Texts given in the Document's + license notice. + + H. Include an unaltered copy of this License. + + I. Preserve the section Entitled "History", Preserve its Title, + and add to it an item stating at least the title, year, new + authors, and publisher of the Modified Version as given on the + Title Page. If there is no section Entitled "History" in the + Document, create one stating the title, year, authors, and + publisher of the Document as given on its Title Page, then add + an item describing the Modified Version as stated in the + previous sentence. + + J. Preserve the network location, if any, given in the Document + for public access to a Transparent copy of the Document, and + likewise the network locations given in the Document for + previous versions it was based on. These may be placed in the + "History" section. You may omit a network location for a work + that was published at least four years before the Document + itself, or if the original publisher of the version it refers + to gives permission. + + K. For any section Entitled "Acknowledgements" or "Dedications", + Preserve the Title of the section, and preserve in the section + all the substance and tone of each of the contributor + acknowledgements and/or dedications given therein. + + L. Preserve all the Invariant Sections of the Document, unaltered + in their text and in their titles. Section numbers or the + equivalent are not considered part of the section titles. + + M. Delete any section Entitled "Endorsements". Such a section + may not be included in the Modified Version. + + N. Do not retitle any existing section to be Entitled + "Endorsements" or to conflict in title with any Invariant + Section. + + O. Preserve any Warranty Disclaimers. + + If the Modified Version includes new front-matter sections or + appendices that qualify as Secondary Sections and contain no + material copied from the Document, you may at your option designate + some or all of these sections as invariant. To do this, add their + titles to the list of Invariant Sections in the Modified Version's + license notice. These titles must be distinct from any other + section titles. + + You may add a section Entitled "Endorsements", provided it contains + nothing but endorsements of your Modified Version by various + parties--for example, statements of peer review or that the text + has been approved by an organization as the authoritative + definition of a standard. + + You may add a passage of up to five words as a Front-Cover Text, + and a passage of up to 25 words as a Back-Cover Text, to the end of + the list of Cover Texts in the Modified Version. Only one passage + of Front-Cover Text and one of Back-Cover Text may be added by (or + through arrangements made by) any one entity. If the Document + already includes a cover text for the same cover, previously added + by you or by arrangement made by the same entity you are acting on + behalf of, you may not add another; but you may replace the old + one, on explicit permission from the previous publisher that added + the old one. + + The author(s) and publisher(s) of the Document do not by this + License give permission to use their names for publicity for or to + assert or imply endorsement of any Modified Version. + + 5. COMBINING DOCUMENTS + + You may combine the Document with other documents released under + this License, under the terms defined in section 4 above for + modified versions, provided that you include in the combination all + of the Invariant Sections of all of the original documents, + unmodified, and list them all as Invariant Sections of your + combined work in its license notice, and that you preserve all + their Warranty Disclaimers. + + The combined work need only contain one copy of this License, and + multiple identical Invariant Sections may be replaced with a single + copy. If there are multiple Invariant Sections with the same name + but different contents, make the title of each such section unique + by adding at the end of it, in parentheses, the name of the + original author or publisher of that section if known, or else a + unique number. Make the same adjustment to the section titles in + the list of Invariant Sections in the license notice of the + combined work. + + In the combination, you must combine any sections Entitled + "History" in the various original documents, forming one section + Entitled "History"; likewise combine any sections Entitled + "Acknowledgements", and any sections Entitled "Dedications". You + must delete all sections Entitled "Endorsements." + + 6. COLLECTIONS OF DOCUMENTS + + You may make a collection consisting of the Document and other + documents released under this License, and replace the individual + copies of this License in the various documents with a single copy + that is included in the collection, provided that you follow the + rules of this License for verbatim copying of each of the documents + in all other respects. + + You may extract a single document from such a collection, and + distribute it individually under this License, provided you insert + a copy of this License into the extracted document, and follow this + License in all other respects regarding verbatim copying of that + document. + + 7. AGGREGATION WITH INDEPENDENT WORKS + + A compilation of the Document or its derivatives with other + separate and independent documents or works, in or on a volume of a + storage or distribution medium, is called an "aggregate" if the + copyright resulting from the compilation is not used to limit the + legal rights of the compilation's users beyond what the individual + works permit. When the Document is included in an aggregate, this + License does not apply to the other works in the aggregate which + are not themselves derivative works of the Document. + + If the Cover Text requirement of section 3 is applicable to these + copies of the Document, then if the Document is less than one half + of the entire aggregate, the Document's Cover Texts may be placed + on covers that bracket the Document within the aggregate, or the + electronic equivalent of covers if the Document is in electronic + form. Otherwise they must appear on printed covers that bracket + the whole aggregate. + + 8. TRANSLATION + + Translation is considered a kind of modification, so you may + distribute translations of the Document under the terms of section + 4. Replacing Invariant Sections with translations requires special + permission from their copyright holders, but you may include + translations of some or all Invariant Sections in addition to the + original versions of these Invariant Sections. You may include a + translation of this License, and all the license notices in the + Document, and any Warranty Disclaimers, provided that you also + include the original English version of this License and the + original versions of those notices and disclaimers. In case of a + disagreement between the translation and the original version of + this License or a notice or disclaimer, the original version will + prevail. + + If a section in the Document is Entitled "Acknowledgements", + "Dedications", or "History", the requirement (section 4) to + Preserve its Title (section 1) will typically require changing the + actual title. + + 9. TERMINATION + + You may not copy, modify, sublicense, or distribute the Document + except as expressly provided under this License. Any attempt + otherwise to copy, modify, sublicense, or distribute it is void, + and will automatically terminate your rights under this License. + + However, if you cease all violation of this License, then your + license from a particular copyright holder is reinstated (a) + provisionally, unless and until the copyright holder explicitly and + finally terminates your license, and (b) permanently, if the + copyright holder fails to notify you of the violation by some + reasonable means prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from + that copyright holder, and you cure the violation prior to 30 days + after your receipt of the notice. + + Termination of your rights under this section does not terminate + the licenses of parties who have received copies or rights from you + under this License. If your rights have been terminated and not + permanently reinstated, receipt of a copy of some or all of the + same material does not give you any rights to use it. + + 10. FUTURE REVISIONS OF THIS LICENSE + + The Free Software Foundation may publish new, revised versions of + the GNU Free Documentation License from time to time. Such new + versions will be similar in spirit to the present version, but may + differ in detail to address new problems or concerns. See + . + + Each version of the License is given a distinguishing version + number. If the Document specifies that a particular numbered + version of this License "or any later version" applies to it, you + have the option of following the terms and conditions either of + that specified version or of any later version that has been + published (not as a draft) by the Free Software Foundation. If the + Document does not specify a version number of this License, you may + choose any version ever published (not as a draft) by the Free + Software Foundation. If the Document specifies that a proxy can + decide which future versions of this License can be used, that + proxy's public statement of acceptance of a version permanently + authorizes you to choose that version for the Document. + + 11. RELICENSING + + "Massive Multiauthor Collaboration Site" (or "MMC Site") means any + World Wide Web server that publishes copyrightable works and also + provides prominent facilities for anybody to edit those works. A + public wiki that anybody can edit is an example of such a server. + A "Massive Multiauthor Collaboration" (or "MMC") contained in the + site means any set of copyrightable works thus published on the MMC + site. + + "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 + license published by Creative Commons Corporation, a not-for-profit + corporation with a principal place of business in San Francisco, + California, as well as future copyleft versions of that license + published by that same organization. + + "Incorporate" means to publish or republish a Document, in whole or + in part, as part of another Document. + + An MMC is "eligible for relicensing" if it is licensed under this + License, and if all works that were first published under this + License somewhere other than this MMC, and subsequently + incorporated in whole or in part into the MMC, (1) had no cover + texts or invariant sections, and (2) were thus incorporated prior + to November 1, 2008. + + The operator of an MMC Site may republish an MMC contained in the + site under CC-BY-SA on the same site at any time before August 1, + 2009, provided the MMC is eligible for relicensing. + +ADDENDUM: How to use this License for your documents +==================================================== + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and license +notices just after the title page: + + Copyright (C) YEAR YOUR NAME. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. + + If you have Invariant Sections, Front-Cover Texts and Back-Cover +Texts, replace the "with...Texts." line with this: + + with the Invariant Sections being LIST THEIR TITLES, with + the Front-Cover Texts being LIST, and with the Back-Cover Texts + being LIST. + + If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + + If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of free +software license, such as the GNU General Public License, to permit +their use in free software. + + +File: cre2.info, Node: references, Next: concept index, Prev: Documentation License, Up: Top + +Appendix C Bibliography and references +************************************** + + +File: cre2.info, Node: concept index, Next: function index, Prev: references, Up: Top + +Appendix D An entry for each concept +************************************ + +[index] +* Menu: + +* 'CRE2_ANCHOR_BOTH': matching. (line 44) +* 'CRE2_ANCHOR_START': matching. (line 44) +* 'CRE2_ERROR_BAD_CHAR_CLASS': regexps. (line 59) +* 'CRE2_ERROR_BAD_CHAR_RANGE': regexps. (line 62) +* 'CRE2_ERROR_BAD_ESCAPE': regexps. (line 56) +* 'CRE2_ERROR_BAD_NAMED_CAPTURE': regexps. (line 89) +* 'CRE2_ERROR_BAD_PERL_OP': regexps. (line 83) +* 'CRE2_ERROR_BAD_UTF8': regexps. (line 86) +* 'CRE2_ERROR_INTERNAL': regexps. (line 53) +* 'CRE2_ERROR_MISSING_BRACKET': regexps. (line 65) +* 'CRE2_ERROR_MISSING_PAREN': regexps. (line 68) +* 'CRE2_ERROR_PATTERN_TOO_LARGE': regexps. (line 92) +* 'CRE2_ERROR_REPEAT_ARGUMENT': regexps. (line 74) +* 'CRE2_ERROR_REPEAT_SIZE': regexps. (line 77) +* 'CRE2_ERROR_REPEA_TOP': regexps. (line 80) +* 'CRE2_ERROR_TRAILING_BACKSLASH': regexps. (line 71) +* 'CRE2_Latin1': options. (line 23) +* 'CRE2_NO_ERROR': regexps. (line 50) +* 'CRE2_UNANCHORED': matching. (line 44) +* 'CRE2_UNKNOWN': options. (line 23) +* 'CRE2_UTF8': options. (line 23) +* FDL, GNU Free Documentation License: Documentation License. + (line 6) + + +File: cre2.info, Node: function index, Next: variable index, Prev: concept index, Up: Top + +Appendix E An entry for each function. +************************************** + +[index] +* Menu: + +* cre2_consume: other. (line 109) +* cre2_consume_re: other. (line 111) +* cre2_delete: regexps. (line 106) +* cre2_easy_match: matching. (line 76) +* cre2_error_arg: regexps. (line 159) +* cre2_error_code: regexps. (line 129) +* cre2_error_string: regexps. (line 134) +* cre2_find_and_consume: other. (line 123) +* cre2_find_and_consume_re: other. (line 125) +* cre2_full_match: other. (line 85) +* cre2_full_match_re: other. (line 87) +* cre2_match: matching. (line 51) +* cre2_new: regexps. (line 94) +* cre2_num_capturing_groups: regexps. (line 116) +* cre2_opt_case_sensitive: options. (line 115) +* cre2_opt_delete: options. (line 37) +* cre2_opt_encoding: options. (line 48) +* cre2_opt_literal: options. (line 73) +* cre2_opt_log_errors: options. (line 68) +* cre2_opt_longest_match: options. (line 63) +* cre2_opt_max_mem: options. (line 122) +* cre2_opt_never_nl: options. (line 98) +* cre2_opt_new: options. (line 33) +* cre2_opt_one_line: options. (line 145) +* cre2_opt_perl_classes: options. (line 133) +* cre2_opt_posix_syntax: options. (line 58) +* cre2_opt_set_case_sensitive: options. (line 116) +* cre2_opt_set_encoding: options. (line 49) +* cre2_opt_set_literal: options. (line 74) +* cre2_opt_set_log_errors: options. (line 69) +* cre2_opt_set_longest_match: options. (line 64) +* cre2_opt_set_max_mem: options. (line 123) +* cre2_opt_set_never_nl: options. (line 99) +* cre2_opt_set_one_line: options. (line 146) +* cre2_opt_set_perl_classes: options. (line 134) +* cre2_opt_set_posix_syntax: options. (line 59) +* cre2_opt_set_word_boundary: options. (line 140) +* cre2_opt_word_boundary: options. (line 139) +* cre2_partial_match: other. (line 96) +* cre2_partial_match_re: other. (line 98) +* cre2_pattern: regexps. (line 110) +* cre2_program_size: regexps. (line 122) +* cre2_strings_to_ranges: matching. (line 98) +* cre2_version_interface_age: version. (line 22) +* cre2_version_interface_current: version. (line 14) +* cre2_version_interface_revision: version. (line 18) +* cre2_version_string: version. (line 10) + + +File: cre2.info, Node: variable index, Next: type index, Prev: function index, Up: Top + +Appendix F An entry for each variable. +************************************** + + +File: cre2.info, Node: type index, Prev: variable index, Up: Top + +Appendix G An entry for each type. +********************************** + +[index] +* Menu: + +* cre2_anchor_t: matching. (line 43) +* cre2_encoding_t: options. (line 22) +* cre2_error_code_t: regexps. (line 45) +* cre2_options_t: options. (line 17) +* cre2_range_t: matching. (line 87) +* cre2_regexp_t: regexps. (line 29) +* cre2_string_t: regexps. (line 35) + + + +Tag Table: +Node: Top1464 +Node: overview3738 +Node: version6906 +Node: regexps7796 +Node: options13843 +Node: matching19960 +Node: other23916 +Node: tips29670 +Node: tips dot29938 +Node: Package License31005 +Node: Documentation License32762 +Node: references57906 +Node: concept index58085 +Node: function index59945 +Node: variable index63574 +Node: type index63747 + +End Tag Table diff --git a/outside/cre2/share/info/dir b/outside/cre2/share/info/dir new file mode 100644 index 0000000000..8a98a5f75f --- /dev/null +++ b/outside/cre2/share/info/dir @@ -0,0 +1,18 @@ +This is the file .../info/dir, which contains the +topmost node of the Info hierarchy, called (dir)Top. +The first time you invoke Info you start off looking at this node. + +File: dir, Node: Top This is the top of the INFO tree + + This (the Directory node) gives a menu of major topics. + Typing "q" exits, "?" lists all Info commands, "d" returns here, + "h" gives a primer for first-timers, + "mEmacs" visits the Emacs manual, etc. + + In Emacs, you can click mouse button 2 on a menu item or cross reference + to select it. + +* Menu: + +Development +* cre2: (cre2). C wrapper for RE2. diff --git a/outside/cre2/src/.gitignore b/outside/cre2/src/.gitignore new file mode 100644 index 0000000000..6b70ebd7ad --- /dev/null +++ b/outside/cre2/src/.gitignore @@ -0,0 +1,44 @@ +*~ +=* +,,* +*.a +*.bz2 +*.fasl +*.gz +*.html +*.info +*.o +*.out +*.so +*.so.* +*.tgz +*.tmp +.DS_Store +.arch +.deps/ +.emacs.* +.gdb_history +.vimview +Makefile +Makefile.in +aclocal.m4 +ar-lib +autom4te* +compile +config.guess +config.h.in +config.sub +config.cache +configure +depcomp +test-driver +doc/mdate-sh +doc/stamp-vti +doc/texinfo.tex +doc/version.texi +install-sh +missing +mkinstalldirs +ltmain.sh +m4/ +autotools/ diff --git a/outside/cre2/src/COPYING b/outside/cre2/src/COPYING new file mode 100644 index 0000000000..672d3b902b --- /dev/null +++ b/outside/cre2/src/COPYING @@ -0,0 +1,35 @@ +Copyright (c) 2012 Marco Maggi +Copyright (c) 2011 Keegan McAllister +All rights reserved. + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. Neither the name of the author nor the names of his + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/outside/cre2/src/INSTALL b/outside/cre2/src/INSTALL new file mode 100644 index 0000000000..7d1c323bea --- /dev/null +++ b/outside/cre2/src/INSTALL @@ -0,0 +1,365 @@ +Installation Instructions +************************* + +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, +2006, 2007, 2008, 2009 Free Software Foundation, Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell commands `./configure; make; make install' should +configure, build, and install this package. The following +more-detailed instructions are generic; see the `README' file for +instructions specific to this package. Some packages provide this +`INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. Caching is +disabled by default to prevent problems with accidental use of stale +cache files. + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You need `configure.ac' if +you want to change it or regenerate `configure' using a newer version +of `autoconf'. + + The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. + + Running `configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type `make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the `make install' phase executed with root + privileges. + + 5. Optionally, type `make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior `make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type `make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide `make + distcheck', which can by used by developers to test that all other + targets like `make install' and `make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. Run `./configure --help' +for details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. This +is known as a "VPATH" build. + + With a non-GNU `make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use `make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple `-arch' options to the +compiler but only a single `-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the `lipo' tool if you have problems. + +Installation Names +================== + + By default, `make install' installs the package's commands under +`/usr/local/bin', include files under `/usr/local/include', etc. You +can specify an installation prefix other than `/usr/local' by giving +`configure' the option `--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option `--exec-prefix=PREFIX' to `configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the +default for these options is expressed in terms of `${prefix}', so that +specifying just `--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to `configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +`make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, `make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +`${prefix}'. Any directories that were specified during `configure', +but not in terms of `${prefix}', must each be overridden at install +time for the entire installation to be relocated. The approach of +makefile variable overrides for each directory variable is required by +the GNU Coding Standards, and ideally causes no recompilation. +However, some platforms have known limitations with the semantics of +shared libraries that end up requiring recompilation when using this +method, particularly noticeable in packages that use GNU Libtool. + + The second method involves providing the `DESTDIR' variable. For +example, `make install DESTDIR=/alternate/directory' will prepend +`/alternate/directory' before all installation names. The approach of +`DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of `${prefix}' +at `configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + + Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of `make' will be. For these packages, running `./configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with `make V=1'; while running `./configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with `make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU +CC is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its `' header file. The option `-nodtk' can be used as +a workaround. If GNU CC is not installed, it is therefore recommended +to try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put `/usr/ucb' early in your `PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in `/usr/bin'. So, if you need `/usr/ucb' +in your `PATH', put it _after_ `/usr/bin'. + + On Haiku, software installed for all users goes in `/boot/common', +not `/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features `configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, `configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option `--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for `CONFIG_SHELL' due to +an Autoconf bug. Until the bug is fixed you can use this workaround: + + CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash + +`configure' Invocation +====================== + + `configure' recognizes the following options to control how it +operates. + +`--help' +`-h' + Print a summary of all of the options to `configure', and exit. + +`--help=short' +`--help=recursive' + Print a summary of the options unique to this package's + `configure', and exit. The `short' variant lists options used + only in the top level, while the `recursive' variant lists options + also present in any nested packages. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: + for more details, including other options available for fine-tuning + the installation locations. + +`--no-create' +`-n' + Run the configure checks, but stop before creating any output + files. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. + diff --git a/outside/cre2/src/LICENSE.re2 b/outside/cre2/src/LICENSE.re2 new file mode 100644 index 0000000000..09e5ec1c74 --- /dev/null +++ b/outside/cre2/src/LICENSE.re2 @@ -0,0 +1,27 @@ +// Copyright (c) 2009 The RE2 Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/outside/cre2/src/Makefile.am b/outside/cre2/src/Makefile.am new file mode 100644 index 0000000000..4eabb2ef90 --- /dev/null +++ b/outside/cre2/src/Makefile.am @@ -0,0 +1,48 @@ +## Process this file with automake to produce Makefile.in + +ACLOCAL_AMFLAGS = -I autotools +EXTRA_DIST = INSTALL configure.sh prepare.sh +dist_doc_DATA = README COPYING LICENSE.re2 + +## -------------------------------------------------------------------- + +cre2_CURRENT = @cre2_VERSION_INTERFACE_CURRENT@ +cre2_REVISION = @cre2_VERSION_INTERFACE_REVISION@ +cre2_AGE = @cre2_VERSION_INTERFACE_AGE@ + +include_HEADERS = src/cre2.h + +lib_LTLIBRARIES = libcre2.la +libcre2_la_LDFLAGS = -version-info $(cre2_CURRENT):$(cre2_REVISION):$(cre2_AGE) +libcre2_la_SOURCES = src/cre2.cpp + +## -------------------------------------------------------------------- + +AM_MAKEINFOFLAGS = --no-split + +info_TEXINFOS = doc/cre2.texi +doc_cre2_TEXINFOS = doc/fdl-1.3.texi + +## -------------------------------------------------------------------- + +check_PROGRAMS = \ + tests/test-version \ + tests/test-options \ + tests/test-rex-alloc \ + tests/test-matching \ + tests/test-easy-matching \ + tests/test-full-match \ + tests/test-partial-match \ + tests/test-consume-match \ + tests/test-find-and-consume-match \ + tests/test-replace \ + tests/test-misc + +AM_CPPFLAGS = -I$(top_srcdir)/src +LDADD = libcre2.la +TESTS = $(check_PROGRAMS) + +installcheck-local: $(check_PROGRAMS) + for f in $(check_PROGRAMS); do $$f; done + +### end of file diff --git a/outside/cre2/src/README b/outside/cre2/src/README new file mode 100644 index 0000000000..d1d2543141 --- /dev/null +++ b/outside/cre2/src/README @@ -0,0 +1,175 @@ + + C wrapper for re2 + ================= + + +Topics +------ + + 1. Introduction + 2. License + 3. Install + 4. Usage + A. Credits + B. Bugs + C. Resources + + +1. Introduction +--------------- + +The CRE2 distribution is a C language wrapper for the RE2 +library, which is implemented in C++. RE2 is a fast, safe, +thread-friendly alternative to backtracking regular +expression engines like those used in PCRE, Perl, and +Python. + + This distribution makes use of the GNU Autotools. + + +2. License +---------- + +Copyright (c) 2012, 2013 Marco Maggi +Copyright (c) 2011 Keegan McAllister +All rights reserved. + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. Neither the name of the author nor the names of his + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +3. Install +---------- + +To install RE2 follow the instructions in the README file in +RE2's. To install CRE2 from a proper release tarball, do +this: + + $ cd cre2-0.1.0 + $ mkdir "=build" + $ cd "=build" + +to inspect the available configuration options: + + $ ../configure --help + +then do it: + + $ ../configure [options] + $ make + $ make check + $ make install + + From a repository checkout or snapshot (the ones from the +Github site): we may need to manually run "libtoolize" the +first time, then we must first run the script "autogen.sh" +from the top source directory, to generate the needed files: + + $ sh autogen.sh + +for this we need to have installed the GNU Autotools: +Automake, Autoconf, Libtool. After this: + + $ ../configure --enable-maintainer-mode [options] + $ make + $ make check + $ make install + + The Makefile supports the DESTDIR environment variable to +install files in a temporary location, example: to see what +will happen: + + $ make -n install DESTDIR=/tmp/marco/cre2 + +to really do it: + + $ make install DESTDIR=/tmp/marco/cre2 + + +4. Usage +-------- + +Read the documentation. + + +A. Credits +---------- + +RE2 is a Google project. CRE2 is based on code by Keegan +McAllister. This distribution was assembled by Marco Maggi. + + +B. Bugs +------- + +Bug reports are appreciated. Register issues at the CRE2 +issue tracker: + + + + +C. Resources +------------ + +The GNU Project software can be found here: + + + +RE2 is available at: + + + +development of this package happens at: + + + +and as backup at: + + + +proper release tarballs for this package are in the download +area at: + + + +the documentation is available online: + + + + +### end of file +# Local Variables: +# mode: text +# coding: utf-8-unix +# fill-column: 60 +# paragraph-start: "*" +# End: diff --git a/outside/cre2/src/autogen.sh b/outside/cre2/src/autogen.sh new file mode 100644 index 0000000000..284a55d60e --- /dev/null +++ b/outside/cre2/src/autogen.sh @@ -0,0 +1,11 @@ +# autogen.sh -- +# +# Run this in the top source directory to rebuild the infrastructure. + +set -xe +test -d autotools || mkdir autotools +test -f autotools/libtool.m4 || libtoolize +autoreconf --warnings=all --install --verbose "$@" + +### end of file + diff --git a/outside/cre2/src/build.sh b/outside/cre2/src/build.sh new file mode 100644 index 0000000000..fdf2e0110c --- /dev/null +++ b/outside/cre2/src/build.sh @@ -0,0 +1,17 @@ +if [ ! -d "=build" ]; then +mkdir "=build" +(libtoolize || glibtoolize) +sh autogen.sh +cd "=build" +../configure --enable-maintainer-mode LDFLAGS=-pthread +make +cd .. +fi +if [ ! -d "../lib" ]; then +mkdir ../lib +fi +if [ ! -d "../include" ]; then +mkdir ../include +fi +cp \=build/.libs/* ../lib +cp src/cre2.h ../include diff --git a/outside/cre2/src/clean.sh b/outside/cre2/src/clean.sh new file mode 100644 index 0000000000..207e3d30d4 --- /dev/null +++ b/outside/cre2/src/clean.sh @@ -0,0 +1,9 @@ +if [ -d "=build" ]; then + rm -r "=build" +fi +if [ -d "../lib" ]; then +rm -r ../lib +fi +if [ -d "../include" ]; then +rm -r ../include +fi diff --git a/outside/cre2/src/configure.ac b/outside/cre2/src/configure.ac new file mode 100644 index 0000000000..4accbb6265 --- /dev/null +++ b/outside/cre2/src/configure.ac @@ -0,0 +1,58 @@ +dnl @configure_input@ +dnl + +AC_PREREQ([2.68]) +AC_INIT([CRE2],[0.1b5],[marco.maggi-ipsu@poste.it], + [cre2],[http://github.com/marcomaggi/cre2/]) +AC_CONFIG_SRCDIR([src/]) +AC_CONFIG_MACRO_DIR([autotools]) +AC_CONFIG_AUX_DIR([autotools]) +AC_CANONICAL_BUILD +AC_CANONICAL_HOST +AC_CANONICAL_TARGET +AM_INIT_AUTOMAKE([1.14 foreign subdir-objects dist-xz no-dist-gzip -Wall]) +AM_MAINTAINER_MODE + +AM_PROG_AR +AC_PROG_INSTALL +AC_PROG_LN_S +AC_PROG_MAKE_SET +AC_PROG_MKDIR_P + +LT_PREREQ([2.4]) +LT_INIT + +AC_PROG_CC +AC_PROG_CC_C_O + +AC_LANG([C++]) +AC_PROG_CXX +AC_PROG_CXX_C_O + +AC_CHECK_LIB([re2],[main],,[AC_MSG_FAILURE([test for RE2 library failed])]) +AC_CHECK_HEADERS([re2/re2.h],,[AC_MSG_ERROR([test for RE2 header failed])]) + +cre2_VERSION_INTERFACE_CURRENT=0 +cre2_VERSION_INTERFACE_REVISION=0 +cre2_VERSION_INTERFACE_AGE=0 +AC_DEFINE_UNQUOTED([cre2_VERSION_INTERFACE_CURRENT], + [$cre2_VERSION_INTERFACE_CURRENT], + [current interface number]) +AC_DEFINE_UNQUOTED([cre2_VERSION_INTERFACE_REVISION], + [$cre2_VERSION_INTERFACE_REVISION], + [current interface implementation number]) +AC_DEFINE_UNQUOTED([cre2_VERSION_INTERFACE_AGE], + [$cre2_VERSION_INTERFACE_AGE], + [current interface age number]) +AC_DEFINE_UNQUOTED([cre2_VERSION_INTERFACE_STRING], + ["$cre2_VERSION_INTERFACE_CURRENT.$cre2_VERSION_INTERFACE_REVISION"], + [library interface version]) +AC_SUBST([cre2_VERSION_INTERFACE_CURRENT]) +AC_SUBST([cre2_VERSION_INTERFACE_REVISION]) +AC_SUBST([cre2_VERSION_INTERFACE_AGE]) + +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT + +dnl end of file diff --git a/outside/cre2/src/configure.sh b/outside/cre2/src/configure.sh new file mode 100644 index 0000000000..c7b9368097 --- /dev/null +++ b/outside/cre2/src/configure.sh @@ -0,0 +1,24 @@ +# configure.sh -- +# +# Run this to configure. + +set -xe + +prefix=/usr/local +if test -d /lib64 +then libdir=${prefix}/lib64 +else libdir=${prefix}/lib +fi + +../configure \ + --config-cache \ + --cache-file=../config.cache \ + --enable-maintainer-mode \ + --disable-static --enable-shared \ + --prefix="${prefix}" \ + --libdir="${libdir}" \ + CFLAGS='-O3' \ + LDFLAGS="-L${libdir}" \ + "$@" + +### end of file diff --git a/outside/cre2/src/doc/cre2.texi b/outside/cre2/src/doc/cre2.texi new file mode 100644 index 0000000000..86c858dc8d --- /dev/null +++ b/outside/cre2/src/doc/cre2.texi @@ -0,0 +1,1474 @@ +\input texinfo.tex +@c %**start of header +@setfilename cre2.info +@settitle C wrapper for RE2 +@c %**end of header + +@include version.texi + +@c page +@c ------------------------------------------------------------ +@c License macros. +@c ------------------------------------------------------------ + +@macro gnu{} +@acronym{GNU} +@end macro + +@macro gpl{} +@acronym{GPL} +@end macro + +@macro fdl{} +@acronym{FDL} +@end macro + +@macro bsd{} +@acronym{BSD} +@end macro + +@c ------------------------------------------------------------ +@c Software related macros. +@c ------------------------------------------------------------ + +@macro bash{} +@command{bash} +@end macro + +@macro gmp{} +@acronym{GMP} +@end macro + +@macro gcc{} +@acronym{GCC} +@end macro + +@macro glibc{} +@gnu{} C Library +@end macro + +@c ------------------------------------------------------------ +@c Network protocols acronyms. +@c ------------------------------------------------------------ + +@macro dns{} +@acronym{DNS} +@end macro + +@macro ftp{} +@acronym{FTP} +@end macro + +@macro dhcp{} +@acronym{DHCP} +@end macro + +@macro icmp{} +@acronym{ICMP} +@end macro + +@macro http{} +@acronym{HTTP} +@end macro + +@macro https{} +@acronym{HTTP} +@end macro + +@macro imap{} +@acronym{IMAP} +@end macro + +@macro ip{} +@acronym{IP} +@end macro + +@macro tcp{} +@acronym{TCP} +@end macro + +@macro udp{} +@acronym{UDP} +@end macro + +@macro ppp{} +@acronym{PPP} +@end macro + +@macro sftp{} +@acronym{FTP} +@end macro + +@macro smtp{} +@acronym{SMTP} +@end macro + +@macro snmp{} +@acronym{SNMP} +@end macro + +@macro ssh{} +@acronym{SSH} +@end macro + +@macro ssl{} +@acronym{SSL} +@end macro + +@macro tls{} +@acronym{TLS} +@end macro + +@c ------------------------------------------------------------ +@c Miscellaneous acronyms. +@c ------------------------------------------------------------ + +@macro adsl{} +@acronym{ADSL} +@end macro + +@macro alsa{} +@acronym{ALSA} +@end macro + +@macro ansi{} +@acronym{ANSI} +@end macro + +@macro api{} +@acronym{API} +@end macro + +@macro ascii{} +@acronym{ASCII} +@end macro + +@macro asciiz{} +@acronym{ASCIIZ} +@end macro + +@macro cdrom{} +@acronym{CDROM} +@end macro + +@macro cli{} +@acronym{CLI} +@end macro + +@macro cpu{} +@acronym{CPU} +@end macro + +@macro gui{} +@acronym{GUI} +@end macro + +@macro ieee{} +@acronym{IEEE} +@end macro + +@macro isp{} +@acronym{ISP} +@end macro + +@macro iso{} +@acronym{ISO} +@end macro + +@macro mime{} +@acronym{MIME} +@end macro + +@macro mpeg{} +@acronym{MPEG} +@end macro + +@macro posix{} +@acronym{POSIX} +@end macro + +@macro ram{} +@acronym{RAM} +@end macro + +@macro rfc{} +@acronym{RFC} +@end macro + +@c Remember that @url is already used by Texinfo. +@macro urla{} +@acronym{URL} +@end macro + +@macro usb{} +@acronym{USB} +@end macro + +@macro utf{} +@acronym{UTF} +@end macro + +@macro uri{} +@acronym{URI} +@end macro + +@macro xmla{} +@acronym{XML} +@end macro + + +@c ------------------------------------------------------------ +@c Arguments macros. +@c ------------------------------------------------------------ + +@macro vari{ARG} +@var{\ARG\1} +@end macro + +@macro varii{ARG} +@var{\ARG\2} +@end macro + +@macro variii{ARG} +@var{\ARG\3} +@end macro + +@macro variv{ARG} +@var{\ARG\4} +@end macro + +@macro varn{ARG} +@var{\ARG\n} +@end macro + +@macro vark{ARG} +@var{\ARG\k} +@end macro + +@macro varj{ARG} +@var{\ARG\j} +@end macro + +@c ------------------------------------------------------------ + +@macro meta{ARG} +<\ARG\> +@end macro + +@macro metai{ARG} +@meta{\ARG\1} +@end macro + +@macro metaii{ARG} +@meta{\ARG\2} +@end macro + +@macro metaiii{ARG} +@meta{\ARG\3} +@end macro + +@macro metaiv{ARG} +@meta{\ARG\4} +@end macro + +@macro metan{ARG} +@meta{\ARG\n} +@end macro + +@macro metak{ARG} +@meta{\ARG\k} +@end macro + +@macro metaj{ARG} +@meta{\ARG\j} +@end macro + +@c ------------------------------------------------------------ +@c C language macros. +@c ------------------------------------------------------------ + +@macro cfunc{NAME} +@code{\NAME\()} +@end macro + +@macro cnull{} +@code{NULL} +@end macro + +@c ------------------------------------------------------------ +@c Scheme language macros. +@c ------------------------------------------------------------ + +@macro clos{} +@acronym{CLOS} +@end macro + +@macro library{NAME} +@code{(\NAME\)} +@end macro + +@macro repl{} +@acronym{REPL} +@end macro + +@macro rnrs{VERSION} +@acronym{R\VERSION\RS} +@end macro + +@macro srfi{} +@acronym{SRFI} +@end macro + +@ignore +Separating the @srfi{} macro from the number with a '--' rather than a +'-' makes the expansion look ugly in menu entries under the Info reader. +IMHO this should not happen, but it does; so we live with this, because +the main purpose of this document is to provide an Info version. +@end ignore +@macro ansrfi{NUM} +@srfi{}-\NUM\ +@end macro + +@c ------------------------------------------------------------ + +@macro func{NAME} +@code{@sc{\NAME\}} +@end macro + +@macro nil{} +@code{()} +@end macro + +@macro true{} +@code{#t} +@end macro + +@macro false{} +@code{#f} +@end macro + +@macro keyword{NAME} +@code{#:\NAME\} +@end macro + +@macro class{NAME} +@code{<\NAME\>} +@end macro + +@c ------------------------------------------------------------ +@c TCL macros. +@c ------------------------------------------------------------ + +@ifinfo +@macro tclcmd{NAME} +[\NAME\] +@end macro +@end ifinfo + +@ifnotinfo +@macro tclcmd{NAME} +@code{[\NAME\]} +@end macro +@end ifnotinfo + +@macro tclvar{NAME} +@code{\NAME\} +@end macro + +@macro tclcode{CODE} +@code{[\CODE\]} +@end macro + +@c ------------------------------------------------------------ +@c Macros for references to external documents. +@c ------------------------------------------------------------ + +@macro glibcref{NODE, TITLE} +@ref{\NODE\,\TITLE\,\TITLE\,libc} +@end macro + +@macro rsixref{NODE, TITLE} +@ref{\NODE\,\TITLE\,\TITLE\,r6rs} +@end macro + +@macro rfiveref{NODE, TITLE} +@ref{\NODE\,\TITLE\,\TITLE\,r5rs} +@end macro + +@macro ikarusref{NODE, TITLE} +@ref{\NODE\,\TITLE\,\TITLE\,ikarus} +@end macro + +@macro bibref{TAG} +@code{[\TAG\]} +@end macro + +@c page +@c ------------------------------------------------------------ +@c Values. +@c ------------------------------------------------------------ + +@set TITLE C wrapper for RE2 + +@c To be used as @value{PACKAGE} whenever we need to include the full +@c name of this package. +@set PACKAGE CRE2 + +@c To be used as @value{PACKAGE} whenever we need to include the +@c nickname of the project: the name that is used to compose the +@c distribution tarball or the web address. +@set PACKAGE_NICKNAME cre2 + +@c To be used as @value{AUTHOR} whenever we need to include the list of +@c authors of this document. +@set AUTHOR Marco Maggi + +@c To be used as @value{AUTHOR_EMAIL} whenever we need to include the +@c email of the *single* author of this document. +@set AUTHOR_EMAIL @email{marco.maggi-ipsu@@poste.it} + +@set AUTHOR_URL @url{http://github.com/marcomaggi} + +@c To be used as @value{COPYRIGHT_YEARS} whenever we need to include the +@c list of copyright years. +@set COPYRIGHT_YEARS 2012 + +@c page +@c ------------------------------------------------------------ +@c Copyright notice. +@c ------------------------------------------------------------ + +@copying +This document describes version @value{VERSION} of @value{PACKAGE}, a C +language wrapper for the C++ library RE2: a fast, safe, thread--friendly +alternative to backtracking regular expression engines like those used +in PCRE, Perl, and Python. + +The package is distributed under the terms of a @acronym{BSD}--like +license and can be downloaded from: + +@center @url{http://sourceforge.net/projects/cre2/files/} + +@noindent +development takes place at: + +@center @url{http://github.com/marcomaggi/@value{PACKAGE_NICKNAME}} + +@noindent +and as a backup at: + +@center @url{http://sourceforge.net/projects/@value{PACKAGE_NICKNAME}} + +@noindent +Copyright @copyright{} @value{COPYRIGHT_YEARS} by @value{AUTHOR} @value{AUTHOR_URL}@* +Copyright @copyright{} 2011 by Keegan McAllister @url{http://github.com/kmcallister/} + +Portions of this document come from the source code of RE2 itself, see +the file @file{LICENSE.re2} for the license notice. + +@quotation +Permission is granted to copy, distribute and/or modify this document +under the terms of the @gnu{} Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with +Invariant Sections being ``@gnu{} Free Documentation License'' and +``@gnu{} General Public License'', no Front--Cover Texts, and no +Back--Cover Texts. A copy of the license is included in the section +entitled ``@gnu{} Free Documentation License''. +@end quotation +@end copying + +@c page +@c ------------------------------------------------------------ +@c Headers. +@c ------------------------------------------------------------ + +@titlepage +@title @value{TITLE} +@subtitle Revision @value{VERSION} +@author @value{AUTHOR} @value{AUTHOR_EMAIL} +@page +@vskip 0pt plus 1filll +@insertcopying +@end titlepage + +@c ------------------------------------------------------------ + +@ifinfo +@dircategory Development +@direntry +* cre2: (cre2). @value{TITLE}. +@end direntry +@end ifinfo + +@c page +@ifnottex +@node Top +@top @value{TITLE} + +@insertcopying + +@menu +* overview:: Overview of the package. +* version:: Version functions. +* regexps:: Precompiled regular expressions + construction. +* options:: Matching configuration. +* matching:: Matching regular expressions. +* other:: Other matching functions. +* tips:: Tips for using the regexp syntax. + +Appendices + +* Package License:: Package license. +* Documentation License:: GNU Free Documentation License. +* references:: Bibliography and references. + +Indexes + +* concept index:: An entry for each concept. +* function index:: An entry for each function. +* variable index:: An entry for each variable. +* type index:: An entry for each type. +@end menu +@end ifnottex + +@c page +@node overview +@chapter Overview of the package + + +@value{PACKAGE} is a C language wrapper for the C++ library RE2: a fast, +safe, thread--friendly alternative to backtracking regular expression +engines like those used in PCRE, Perl, and Python. @value{PACKAGE} is +based on code by Keegan McAllister for the @code{haskell-re2} binding: + +@center @url{http://github.com/kmcallister/haskell-re2} + +For the supported regular expressions syntax we should refer to the +original documentation: + +@center @url{http://code.google.com/p/re2/wiki/Syntax} + +The C wrapper is meant to make it easier to interface RE2 with other +languages. The exposed @api{} allows searching for substrings of text +matching regular expressions and reporting portions of text matching +parenthetical subexpressions. + +@value{PACKAGE} installs the single header file @file{cre2.h}. All the +function names in the @api{} are prefixed with @code{cre2_}; all the +constant names are prefixed with @code{CRE2_}; all the type names are +prefixed with @code{cre2_} and suffixed with @code{_t}. + +When searching for the installed libraries with the @gnu{} Autotools, we +can use the following macros in @file{configure.ac}: + +@example +AC_CHECK_LIB([re2],[main],, + [AC_MSG_FAILURE([test for RE2 library failed])]) + +AC_CHECK_LIB([cre2],[cre2_version_string],, + [AC_MSG_FAILURE([test for CRE2 library failed])]) +AC_CHECK_HEADERS([cre2.h],, + [AC_MSG_ERROR([test for RE2 header failed])]) +@end example + +@noindent +notice that there is no need to check for the header file +@file{re2/re2.h}. + +It is customary for regular expression engines to provide methods to +replace backslash sequences like @code{\1}, @code{\2}, @dots{} in a +given string with portions of text that matched the first, second, +@dots{} parenthetical subexpression; @value{PACKAGE} does @strong{not} +provide such methods in its public @api{}, because they require +interacting with the storage mechanism in the client code. However, it +is not difficult to implement such substitutions given the results of a +regular expression matching operation. + +Some functions and methods from RE2 requiring memory allocation handling +are unofficially wrapped by @value{PACKAGE} with unsafe code (execution +will succeed when no memory allocation errors happen). These +``problematic'' functions are documented in the header file +@file{cre2.h} and, at present, are not considered part of the public +@api{} of @value{PACKAGE}. + +It is sometimes useful to try a program in the original C++ to verify if +a problem is caused by @value{PACKAGE} or is in the original RE2 code; +we may want to start by customising this program: + +@example +/* compile and run with: + + $ g++ -Wall -o proof proof.cpp -lre2 && ./proof +*/ + +#include +#include + +static void try_match (RE2::Options& opt, const char * text); + +int +main (int argc, const char *const argv[]) +@{ + RE2::Options opt; + opt.set_never_nl(true); + try_match(opt, "abcdef"); + return 0; +@} +void +try_match (RE2::Options& opt, const char * text) +@{ + RE2 re("abcdef", opt); + assert(re.ok()); + assert(RE2::FullMatch(text, re)); + //assert(RE2::PartialMatch(text, re)); +@} +@end example + +@c page +@node version +@chapter Version functions + + +The installed libraries follow version numbering as established by the +@gnu{} Autotools. For an explanation of interface numbers as managed by +@gnu{} Libtool @xref{Libtool versioning, interface, Libtool's versioning +system, libtool, Shared library support for @gnu{}}. + + +@deftypefun {const char *} cre2_version_string (void) +Return a pointer to a statically allocated @asciiz{} string representing +the interface version number. +@end deftypefun + + +@deftypefun int cre2_version_interface_current (void) +Return an integer representing the library interface current number. +@end deftypefun + + +@deftypefun int cre2_version_interface_revision (void) +Return an integer representing the library interface current revision +number. +@end deftypefun + + +@deftypefun int cre2_version_interface_age (void) +Return an integer representing the library interface current age. +@end deftypefun + +@c page +@node regexps +@chapter Precompiled regular expressions construction + + +Regular expression objects are built and finalised as follows: + +@example +cre2_regexp_t * rex; +cre2_options_t * opt; + +opt = cre2_opt_new(); +if (opt) @{ + cre2_opt_set_log_errors(opt, 0); + rex = cre2_new("ciao", 4, opt); + if (rex) @{ + if (!cre2_error_code(rex)) + /* successfully built */ + else + /* an error occurred while compiling rex */ + cre2_delete(rex); + @} else @{ + /* rex memory allocation error */ + @} + cre2_opt_delete(opt); +@} else @{ + /* opt memory allocation error */ +@} +@end example + + +@deftp {Opaque Type} cre2_regexp_t +Opaque type for regular expression objects; it is meant to be used to +declare pointers to objects. Instances of this type can be used for any +number of matching operations and are safe for concurrent use by +multiple threads. +@end deftp + + +@deftp {Struct Typedef} cre2_string_t +Simple data structure used to reference a portion of another string. It +has the following fields: + +@table @code +@item const char * data +Pointer to the first byte in the referenced substring. + +@item int length +The number of bytes in the referenced substring. +@end table +@end deftp + + +@deftp {Enumeration Typedef} cre2_error_code_t +Enumeration type for error codes returned by @cfunc{cre2_error_code}. +It contains the following symbols: + +@table @code +@item CRE2_NO_ERROR +@cindex @code{CRE2_NO_ERROR} +Defined as @code{0}, represents a successful operation. + +@item CRE2_ERROR_INTERNAL +@cindex @code{CRE2_ERROR_INTERNAL} +Unexpected error. + +@item CRE2_ERROR_BAD_ESCAPE +@cindex @code{CRE2_ERROR_BAD_ESCAPE} +Bad escape sequence. + +@item CRE2_ERROR_BAD_CHAR_CLASS +@cindex @code{CRE2_ERROR_BAD_CHAR_CLASS} +Bad character class. + +@item CRE2_ERROR_BAD_CHAR_RANGE +@cindex @code{CRE2_ERROR_BAD_CHAR_RANGE} +Bad character class range. + +@item CRE2_ERROR_MISSING_BRACKET +@cindex @code{CRE2_ERROR_MISSING_BRACKET} +Missing closing @code{]}. + +@item CRE2_ERROR_MISSING_PAREN +@cindex @code{CRE2_ERROR_MISSING_PAREN} +Missing closing @code{)}. + +@item CRE2_ERROR_TRAILING_BACKSLASH +@cindex @code{CRE2_ERROR_TRAILING_BACKSLASH} +Trailing @code{\} at end of regexp. + +@item CRE2_ERROR_REPEAT_ARGUMENT +@cindex @code{CRE2_ERROR_REPEAT_ARGUMENT} +Repeat argument missing, e.g. @code{*}. + +@item CRE2_ERROR_REPEAT_SIZE +@cindex @code{CRE2_ERROR_REPEAT_SIZE} +Bad repetition argument. + +@item CRE2_ERROR_REPEA_TOP +@cindex @code{CRE2_ERROR_REPEA_TOP} +Bad repetition operator. + +@item CRE2_ERROR_BAD_PERL_OP +@cindex @code{CRE2_ERROR_BAD_PERL_OP} +Bad Perl operator. + +@item CRE2_ERROR_BAD_UTF8 +@cindex @code{CRE2_ERROR_BAD_UTF8} +Invalid @utf{}-8 in regexp. + +@item CRE2_ERROR_BAD_NAMED_CAPTURE +@cindex @code{CRE2_ERROR_BAD_NAMED_CAPTURE} +Bad named capture group. + +@item CRE2_ERROR_PATTERN_TOO_LARGE +@cindex @code{CRE2_ERROR_PATTERN_TOO_LARGE} +Pattern too large (compile failed). +@end table +@end deftp + + +@deftypefun {cre2_regexp_t *} cre2_new (const char * @var{pattern}, int @var{pattern_len}, const cre2_options_t * @var{opt}) +Build and return a new regular expression object representing the +@var{pattern} of length @var{pattern_len} bytes; the object is +configured with the options in @var{opt}. If memory allocation fails: +the return value is a @cnull{} pointer. + +The options object @var{opt} is duplicated in the internal state of the +regular expression instance, so @var{opt} can be safely mutated or +finalised after this call. If @var{opt} is @cnull{}: the regular +expression object is built with the default set of options. +@end deftypefun + + +@deftypefun void cre2_delete (cre2_regexp_t * @var{rex}) +Finalise a regular expression object releasing all the associated +resources. +@end deftypefun + + +@deftypefun {const char *} cre2_pattern (const cre2_regexp_t * @var{rex}) +Whether @var{rex} is a successfully built regular expression object or +not: return a pointer to the pattern string. The returned pointer is +valid only while @var{rex} is alive: if @cfunc{cre2_delete} is applied +to @var{rex} the pointer becomes invalid. +@end deftypefun + + +@deftypefun int cre2_num_capturing_groups (const cre2_regexp_t * @var{rex}) +If @var{rex} is a successfully built regular expression object: return a +non--negative integer representing the number of capturing groups +(parenthetical subexpressions) in the pattern. If an error occurred +while building @var{rex}: return @code{-1}. +@end deftypefun + + +@deftypefun int cre2_program_size (const cre2_regexp_t * @var{rex}) +If @var{rex} is a successfully built regular expression object: return a +non--negative integer representing the program size, a very approximate +measure of a regexp's ``cost''; larger numbers are more expensive than +smaller numbers. If an error occurred while building @var{rex}: return +@code{-1}. +@end deftypefun + + +@deftypefun int cre2_error_code (const cre2_regexp_t * @var{rex}) +In case an error occurred while building @var{rex}: return an integer +representing the associated error code. Return zero if no error +occurred. +@end deftypefun + + +@deftypefun {const char *} cre2_error_string (const cre2_regexp_t * @var{rex}) +If an error occurred while building @var{rex}: return a pointer to an +@asciiz{} string representing the associated error message. The +returned pointer is valid only while @var{rex} is alive: if +@cfunc{cre2_delete} is applied to @var{rex} the pointer becomes invalid. + +If @var{rex} is a successfully built regular expression object: return a +pointer to an empty string. + +The following code: + +@example +cre2_regexp_t * rex; + +rex = cre2_new("ci(ao", 5, NULL); +@{ + printf("error: code=%d, msg=\"%s\"\n", + cre2_error_code(rex), + cre2_error_string(rex)); +@} +cre2_delete(rex); +@end example + +@noindent +prints: + +@example +error: code=6, msg="missing ): ci(ao" +@end example +@end deftypefun + + +@deftypefun void cre2_error_arg (const cre2_regexp_t * @var{rex}, cre2_string_t * @var{arg}) +If an error occurred while building @var{rex}: fill the structure +referenced by @var{arg} with the interval of bytes representing the +offending portion of the pattern. + +If @var{rex} is a successfully built regular expression object: +@var{arg} references an empty string. + +The following code: + +@example +cre2_regexp_t * rex; +cre2_string_t S; + +rex = cre2_new("ci(ao", 5, NULL); +@{ + cre2_error_arg(rex, &S); + printf("arg: len=%d, data=\"%s\"\n", S.length, S.data); +@} +cre2_delete(rex); +@end example + +@noindent +prints: + +@example +arg: len=5 data="ci(ao" +@end example +@end deftypefun + +@c page +@node options +@chapter Matching configuration + + +Compiled regular expressions can be configured, at construction--time, +with a number of options collected in a @code{cre2_options_t} object. +Notice that, by default, when attempting to compile an invalid regular +expression pattern, RE2 will print to @code{stderr} an error message; +usually we want to avoid this logging by disabling the associated +option: + +@example +cre2_options_t * opt; + +opt = cre2_opt_new(); +cre2_opt_set_log_errors(opt, 0); +@end example + + +@deftp {Opaque Typedef} cre2_options_t +Type of opaque pointers to options objects. Any instance of this type +can be used to configure any number of regular expression objects. +@end deftp + + +@deftp {Enumeration Typedef} cre2_encoding_t +@cindex @code{CRE2_UNKNOWN} +@cindex @code{CRE2_UTF8} +@cindex @code{CRE2_Latin1} +Enumeration type for constants selecting encoding. It contains the +following values: + +@example +CRE2_UNKNOWN +CRE2_UTF8 +CRE2_Latin1 +@end example + +The value @code{CRE2_UNKNOWN} should never be used: it exists only in +case there is a mismatch between the definitions of RE2 and +@value{PACKAGE}. +@end deftp + + +@deftypefun {cre2_options_t *} cre2_opt_new (void) +Allocate and return a new options object. If memory allocation fails: +the return value is a @cnull{} pointer. +@end deftypefun + + +@deftypefun void cre2_opt_delete (cre2_options_t * @var{opt}) +Finalise an options object releasing all the associated resources. +Compiled regular expressions configured with this object are +@strong{not} affected by its destruction. +@end deftypefun + + +All the following functions are getters and setters for regular +expression options; the @var{flag} argument to the setter must be false +to disable the option and true to enable it; unless otherwise specified +the @code{int} return value is true if the option is enabled and false +if it is disabled. + + +@deftypefun cre2_encoding_t cre2_opt_encoding (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_encoding (cre2_options_t * @var{opt}, cre2_encoding_t @var{enc}) +By default, the regular expression pattern and input text are +interpreted as @utf{}-8. CRE2_Latin1 encoding causes them to be +interpreted as Latin-1. + +The getter returns @code{CRE2_UNKNOWN} if the encoding value returned by +RE2 is unknown. +@end deftypefun + + +@deftypefun int cre2_opt_posix_syntax (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_posix_syntax (cre2_options_t * @var{opt}, int @var{flag}) +Restrict regexps to @posix{} egrep syntax. Default is disabled. +@end deftypefun + + +@deftypefun int cre2_opt_longest_match (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_longest_match (cre2_options_t * @var{opt}, int @var{flag}) +Search for longest match, not first match. Default is disabled. +@end deftypefun + + +@deftypefun int cre2_opt_log_errors (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_log_errors (cre2_options_t * @var{opt}, int @var{flag}) +Log syntax and execution errors to @code{stderr}. Default is enabled. +@end deftypefun + + +@deftypefun int cre2_opt_literal (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_literal (cre2_options_t * @var{opt}, int @var{flag}) +Interpret the pattern string as literal, not as regular expression. +Default is disabled. + +Setting this option is equivalent to quoting all the special characters +defining a regular expression pattern: + +@example +cre2_regexp_t * rex; +cre2_options_t * opt; +const char * pattern = "(ciao) (hello)"; +const char * text = pattern; +int len = strlen(pattern); + +opt = cre2_opt_new(); +cre2_opt_set_literal(opt, 1); +rex = cre2_new(pattern, len, opt); +@{ + /* successful match */ + cre2_match(rex, text, len, 0, len, + CRE2_UNANCHORED, NULL, 0); +@} +cre2_delete(rex); +cre2_opt_delete(opt); +@end example +@end deftypefun + + +@deftypefun int cre2_opt_never_nl (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_never_nl (cre2_options_t * @var{opt}, int @var{flag}) +Never match a newline character, even if it is in the regular expression +pattern; default is disabled. Turning on this option allows us to +attempt a partial match, against the beginning of a multiline text, +without using subpatterns to exclude the newline in the regexp pattern. + +@itemize +@item +When set to true: matching always fails if the text or the regexp +contains a newline. + +@item +When set to false: matching succeeds or fails taking normal account of +newlines. + +@item +The option does @strong{not} cause newlines to be skipped. +@end itemize +@end deftypefun + + +@deftypefun int cre2_opt_case_sensitive (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_case_sensitive (cre2_options_t * @var{opt}, int @var{flag}) +Match is case--sensitive; the regular expression pattern can override +this setting with @code{(?i)} unless configured in @posix{} syntax +mode. Default is enabled. +@end deftypefun + + +@deftypefun int cre2_opt_max_mem (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_max_mem (cre2_options_t * @var{opt}, int @var{m}) +The max memory option controls how much memory can be used to hold the +compiled form of the regular expression and its cached @acronym{DFA} +graphs. These functions set and get such amount of memory. See the +documentation of RE2 for details. +@end deftypefun + + +The following options are only consulted when @posix{} syntax is +enabled; when @posix{} syntax is disabled: these features are always +enabled and cannot be turned off. + + +@deftypefun int cre2_opt_perl_classes (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_perl_classes (cre2_options_t * @var{opt}, int @var{flag}) +Allow Perl's @code{\d}, @code{\s}, @code{\w}, @code{\D}, @code{\S}, +@code{\W}. Default is disabled. +@end deftypefun + + +@deftypefun int cre2_opt_word_boundary (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_word_boundary (cre2_options_t * @var{opt}, int @var{flag}) +Allow Perl's @code{\b}, @code{\B} (word boundary and not). Default is +disabled. +@end deftypefun + + +@deftypefun int cre2_opt_one_line (cre2_options_t * @var{opt}) +@deftypefunx void cre2_opt_set_one_line (cre2_options_t * @var{opt}, int @var{flag}) +The patterns @code{^} and @code{$} only match at the beginning and end +of the text. Default is disabled. +@end deftypefun + +@c page +@node matching +@chapter Matching regular expressions + + +Basic pattern matching goes as follows (with error checking omitted): + +@example +cre2_regexp_t * rex; +cre2_options_t * opt; +const char * pattern = "(ciao) (hello)"; + +opt = cre2_opt_new(); +cre2_opt_set_posix_syntax(opt, 1); + +rex = cre2_new(pattern, strlen(pattern), opt); +@{ + const char * text = "ciao hello"; + int text_len = strlen(text); + int nmatch = 3; + cre2_string_t match[nmatch]; + + cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, + match, nmatch); + + /* prints: full match: ciao hello */ + printf("full match: "); + fwrite(match[0].data, match[0].length, 1, stdout); + printf("\n"); + + /* prints: first group: ciao */ + printf("first group: "); + fwrite(match[1].data, match[1].length, 1, stdout); + printf("\n"); + + /* prints: second group: hello */ + printf("second group: "); + fwrite(match[2].data, match[2].length, 1, stdout); + printf("\n"); +@} +cre2_delete(rex); +cre2_opt_delete(opt); +@end example + + +@deftp {Enumeration Typedef} cre2_anchor_t +@cindex @code{CRE2_UNANCHORED} +@cindex @code{CRE2_ANCHOR_START} +@cindex @code{CRE2_ANCHOR_BOTH} +Enumeration type for the anchor point of matching operations. It +contains the following constants: + +@example +CRE2_UNANCHORED +CRE2_ANCHOR_START +CRE2_ANCHOR_BOTH +@end example +@end deftp + + +@deftypefun int cre2_match (const cre2_regexp_t * @var{rex}, const char * @var{text}, int @var{text_len}, int @var{start_pos}, int @var{end_pos}, cre2_anchor_t @var{anchor}, cre2_string_t * @var{match}, int @var{nmatch}) +Match a substring of the text referenced by @var{text} and holding +@var{text_len} bytes against the regular expression object @var{rex}. +Return true if the text matched, false otherwise. + +The zero--based indices @var{start_pos} (inclusive) and @var{end_pos} +(exclusive) select the substring of @var{text} to be examined. +@var{anchor} selects the anchor point for the matching operation. + +Data about the matching groups is stored in the array @var{match}, which +must have at least @var{nmatch} entries; the referenced substrings are +portions of the @var{text} buffer. If we are only interested in +verifying if the text matches or not (ignoring the matching portions of +text): we can use @cnull{} as @var{match} argument and @math{0} as +@var{nmatch} argument. + +The first element of @var{match} (index @math{0}) references the full +portion of the substring of @var{text} matching the pattern; the second +element of @var{match} (index @math{1}) references the portion of text +matching the first parenthetical subexpression, the third element of +@var{match} (index @math{2}) references the portion of text matching the +second parenthetical subexpression; and so on. +@end deftypefun + + +@deftypefun int cre2_easy_match (const char * @var{pattern}, int @var{pattern_len}, const char * @var{text}, int @var{text_len}, cre2_string_t * @var{match}, int @var{nmatch}) +Like @cfunc{cre2_match} but the pattern is specified as string +@var{pattern} holding @var{pattern_len} bytes. Also the text is fully +matched without anchoring. + +If the text matches the pattern: the return value is @math{1}. If the +text does not match the pattern: the return value is @math{0}. If the +pattern is invalid: the return value is @math{2}. +@end deftypefun + + +@deftp {Struct Typedef} cre2_range_t +Structure type used to represent a substring of the text to be matched +as starting and ending indices. It has the following fields: + +@table @code +@item long start +Inclusive start byte index. + +@item long past +Exclusive end byte index. +@end table +@end deftp + + +@deftypefun void cre2_strings_to_ranges (const char * @var{text}, cre2_range_t * @var{ranges}, cre2_string_t * @var{strings}, int @var{nmatch}) +Given an array of @var{strings} with @var{nmatch} elements being the +result of matching @var{text} against a regular expression: fill the +array of @var{ranges} with the index intervals in the @var{text} buffer +representing the same results. +@end deftypefun + +@c page +@node other +@chapter Other matching functions + + +The following functions match a buffer of text against a regular +expression, allowing the extraction of portions of text matching +parenthetical subexpressions. All of them show the following behaviour: + +@itemize +@item +If the text matches the pattern: the return value is @math{1}; if the +text does not match the pattern: the return value is @math{0}. + +@item +If the pattern is invalid: the return value is @math{0}; there is no way +to distinguish this case from the case of text not matching other than +looking at what RE2 prints to @code{stderr}. + +@item +It is impossible to turn off logging of error messages to @code{stderr} +when the specification of the regular expression is invalid. + +@item +Data about the matching groups is stored in the array @var{match}, which +must have at least @var{nmatch} slots; the referenced substrings are +portions of the @var{text} buffer. + +@item +The array @var{match} can have a number of slots between zero (included) +and the number of parenthetical subexpressions in @var{pattern} +(excluded); if @var{nmatch} is greater than the number of parenthetical +subexpressions: the return value is @math{0}. + +@item +If we are only interested in verifying if the text matches the pattern +or not: we can use @cnull{} as @var{match} argument and @math{0} as +@var{nmatch} argument. + +@item +The first slot of @var{match} (index @math{0}) references the portion of +text matching the first parenthetical subexpression; the second slot of +@var{match} (index @math{1}) references the portion of text matching the +second parenthetical subexpression; and so on. +@end itemize + +@noindent +see the documentation of each function for the differences. + +The following example is a successful match: + +@example +const char * pattern = "ci.*ut"; +const char * text = "ciao salut"; +cre2_string_t input = @{ + .data = text, + .length = strlen(text) +@}; +int result; +result = cre2_full_match(pattern, &input, NULL, 0); + +result @result{} 1 +@end example + +@noindent +the following example is a successful match in which the parenthetical +subexpression is ignored: + +@example +const char * pattern = "(ciao) salut"; +const char * text = "ciao salut"; +cre2_string_t input = @{ + .data = text, + .length = strlen(text) +@}; +int result; +result = cre2_full_match(pattern, &input, NULL, 0); + +result @result{} 1 +@end example + +@noindent +the following example is a successful match in which the portion of text +matching the parenthetical subexpression is reported: + +@example +const char * pattern = "(ciao) salut"; +const char * text = "ciao salut"; +cre2_string_t input = @{ + .data = text, + .length = strlen(text) +@}; +int nmatch = 1; +cre2_string_t match[nmatch]; +int result; +result = cre2_full_match(pattern, &input, match, nmatch); + +result @result{} 1 +strncmp(text, input.data, input.length) @result{} 0 +strncmp("ciao", match[0].data, match[0].length) @result{} 0 +@end example + + +@deftypefun int cre2_full_match (const char * @var{pattern}, const cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +@deftypefunx int cre2_full_match_re (cre2_regexp_t * @var{rex}, const cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +Match the zero--terminated string @var{pattern} or the precompiled +regular expression @var{rex} against the full buffer @var{text}. + +For example: the text @code{abcdef} matches the pattern @code{abcdef} +according to this function, but neither the pattern @code{abc} nor the +pattern @code{def} will match. +@end deftypefun + + +@deftypefun int cre2_partial_match (const char * @var{pattern}, const cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +@deftypefunx int cre2_partial_match_re (cre2_regexp_t * @var{rex}, const cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +Match the zero--terminated string @var{pattern} or the precompiled +regular expression @var{rex} against the buffer @var{text}, resulting in +success if a substring of @var{text} matches; these functions behave +like the full match ones, but the matching text does not need to be +anchored to the beginning and end. + +For example: the text @code{abcDEFghi} matches the pattern @code{DEF} +according to this function. +@end deftypefun + + +@deftypefun int cre2_consume (const char * @var{pattern}, cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +@deftypefunx int cre2_consume_re (cre2_regexp_t * @var{rex}, cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +Match the zero--terminated string @var{pattern} or the precompiled +regular expression @var{rex} against the buffer @var{text}, resulting in +success if the prefix of @var{text} matches. The data structure +referenced by @var{text} is mutated to reference text right after the +last byte that matched the pattern. + +For example: the text @code{abcDEF} matches the pattern @code{abc} +according to this function; after the call @var{text} will reference the +text @code{DEF}. +@end deftypefun + + +@deftypefun int cre2_find_and_consume (const char * @var{pattern}, cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +@deftypefunx int cre2_find_and_consume_re (cre2_regexp_t * @var{rex}, cre2_string_t * @var{text}, cre2_string_t * @var{match}, int @var{nmatch}) +Match the zero--terminated string @var{pattern} or the precompiled +regular expression @var{rex} against the buffer @var{text}, resulting in +success if, after skipping a non--matching prefix in @var{text}, a +substring of @var{text} matches. The data structure referenced by +@var{text} is mutated to reference text right after the last byte that +matched the pattern. + +For example: the text @code{abcDEFghi} matches the pattern @code{DEF} +according to this function; the prefix @code{abc} is skipped; after the +call @var{text} will reference the text @code{ghi}. +@end deftypefun + +@c page +@node tips +@chapter Tips for using the regexp syntax + + +@menu +* tips dot:: Matching newlines with the + @code{.} subpattern. +@end menu + +@c page +@node tips dot +@section Matching newlines with the @code{.} subpattern + + +By default the dot subpattern @code{.} matches any character but +newlines; to enable newline matching we have to enable the @code{s} flag +using the special subpattern @samp{(?)} or +@samp{(?:)}, where @code{} is a sequence of +characters, one character for each flag, and @code{} is a regexp +subpattern. Notice that the parentheses in @code{(?:)} are +non--capturing. + + So let's consider the text @code{ciao\nhello}: + +@itemize +@item +The regexp @code{ciao.hello} does @strong{not} match because @code{s} is +disabled. + +@item +The regexp @code{(?s)ciao.hello} matches because the subpattern +@code{(?s)} has enabled flag @code{s} for the rest of the pattern, +including the dot. + +@item +The regexp @code{ciao(?s).hello} matches because the subpattern +@code{(?s)} has enabled flag @code{s} for the rest of the pattern, +including the dot. + +@item +The regexp @code{ciao(?s:.)hello} matches because the subpattern +@code{(?s:.)} has enabled flag @code{s} for the subpattern @code{.} +which is the dot. +@end itemize + +@c page +@node Package License +@appendix Package license + + +Copyright @copyright{} 2012 Marco Maggi @value{AUTHOR_URL}@* +Copyright @copyright{} 2011 Keegan McAllister @url{http://github.com/kmcallister/}@* +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +@enumerate +@item +Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +@item +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +@item +Neither the name of the author nor the names of his contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. +@end enumerate + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS +IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +@c page + +@include fdl-1.3.texi + +@c page +@node references +@appendix Bibliography and references + + + +@c page +@node concept index +@appendix An entry for each concept + +@printindex cp + +@node function index +@appendix An entry for each function. + +@printindex fn + +@node variable index +@appendix An entry for each variable. + +@printindex vr + +@node type index +@appendix An entry for each type. + +@printindex tp + +@contents +@bye + +@c end of file diff --git a/outside/cre2/src/doc/fdl-1.3.texi b/outside/cre2/src/doc/fdl-1.3.texi new file mode 100644 index 0000000000..1a7835ec59 --- /dev/null +++ b/outside/cre2/src/doc/fdl-1.3.texi @@ -0,0 +1,509 @@ +@node Documentation License +@appendix GNU Free Documentation License + +@cindex FDL, GNU Free Documentation License +@center Version 1.3, 3 November 2008 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +@uref{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +@end display + +@enumerate 0 +@item +PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +functional and useful document @dfn{free} in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. + +This License is a kind of ``copyleft'', which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + +@item +APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The ``Document'', below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as ``you''. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. + +A ``Modified Version'' of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A ``Secondary Section'' is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The ``Invariant Sections'' are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. + +The ``Cover Texts'' are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. + +A ``Transparent'' copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not ``Transparent'' is called ``Opaque''. + +Examples of suitable formats for Transparent copies include plain +@sc{ascii} without markup, Texinfo input format, La@TeX{} input +format, @acronym{SGML} or @acronym{XML} using a publicly available +@acronym{DTD}, and standard-conforming simple @acronym{HTML}, +PostScript or @acronym{PDF} designed for human modification. Examples +of transparent image formats include @acronym{PNG}, @acronym{XCF} and +@acronym{JPG}. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, @acronym{SGML} or +@acronym{XML} for which the @acronym{DTD} and/or processing tools are +not generally available, and the machine-generated @acronym{HTML}, +PostScript or @acronym{PDF} produced by some word processors for +output purposes only. + +The ``Title Page'' means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, ``Title Page'' means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + +The ``publisher'' means any person or entity that distributes copies +of the Document to the public. + +A section ``Entitled XYZ'' means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as ``Acknowledgements'', +``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' +of such a section when you modify the Document means that it remains a +section ``Entitled XYZ'' according to this definition. + +The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. + +@item +VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + +@item +COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. + +@item +MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +@enumerate A +@item +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +@item +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +@item +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +@item +Preserve all the copyright notices of the Document. + +@item +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +@item +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +@item +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +@item +Include an unaltered copy of this License. + +@item +Preserve the section Entitled ``History'', Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled ``History'' in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +@item +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the ``History'' section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +@item +For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +@item +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +@item +Delete any section Entitled ``Endorsements''. Such a section +may not be included in the Modified Version. + +@item +Do not retitle any existing section to be Entitled ``Endorsements'' or +to conflict in title with any Invariant Section. + +@item +Preserve any Warranty Disclaimers. +@end enumerate + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section Entitled ``Endorsements'', provided it contains +nothing but endorsements of your Modified Version by various +parties---for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. + +@item +COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled ``History'' +in the various original documents, forming one section Entitled +``History''; likewise combine any sections Entitled ``Acknowledgements'', +and any sections Entitled ``Dedications''. You must delete all +sections Entitled ``Endorsements.'' + +@item +COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. + +@item +AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an ``aggregate'' if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. + +@item +TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. + +If a section in the Document is Entitled ``Acknowledgements'', +``Dedications'', or ``History'', the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. + +@item +TERMINATION + +You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. + +@item +FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +@uref{http://www.gnu.org/copyleft/}. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. + +@item +RELICENSING + +``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the +site means any set of copyrightable works thus published on the MMC +site. + +``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. + +``Incorporate'' means to publish or republish a Document, in whole or +in part, as part of another Document. + +An MMC is ``eligible for relicensing'' if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. + +The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. + +@end enumerate + +@page +@heading ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + +@smallexample +@group + Copyright (C) @var{year} @var{your name}. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +@end group +@end smallexample + +If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the ``with@dots{}Texts.'' line with this: + +@smallexample +@group + with the Invariant Sections being @var{list their titles}, with + the Front-Cover Texts being @var{list}, and with the Back-Cover Texts + being @var{list}. +@end group +@end smallexample + +If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. + +@c Local Variables: +@c ispell-local-pdict: "ispell-dict" +@c End: + diff --git a/outside/cre2/src/prepare.sh b/outside/cre2/src/prepare.sh new file mode 100644 index 0000000000..74afa769b5 --- /dev/null +++ b/outside/cre2/src/prepare.sh @@ -0,0 +1,10 @@ +# prepare.sh -- +# +# Run this to rebuild the infrastructure and configure. + +set -xe + +(cd .. && sh autogen.sh) +sh ../configure.sh + +### end of file diff --git a/outside/cre2/src/src/cre2.cpp b/outside/cre2/src/src/cre2.cpp new file mode 100644 index 0000000000..17819f6add --- /dev/null +++ b/outside/cre2/src/src/cre2.cpp @@ -0,0 +1,631 @@ +/* + Source file for CRE2, a C language wrapper for RE2: a regular + expressions library by Google. + + Copyright (c) 2012 Marco Maggi + Copyright (c) 2011 Keegan McAllister + All rights reserved. + + For the license notice see the COPYING file. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include "cre2.h" + +#include +#include + + +/** -------------------------------------------------------------------- + ** Version functions. + ** ----------------------------------------------------------------- */ + +const char * +cre2_version_string (void) +{ + return cre2_VERSION_INTERFACE_STRING; +} +int +cre2_version_interface_current (void) +{ + return cre2_VERSION_INTERFACE_CURRENT; +} +int +cre2_version_interface_revision (void) +{ + return cre2_VERSION_INTERFACE_REVISION; +} +int +cre2_version_interface_age (void) +{ + return cre2_VERSION_INTERFACE_AGE; +} + + +/** -------------------------------------------------------------------- + ** Options objects. + ** ----------------------------------------------------------------- */ + +/* Cast the pointer argument "opt" to a pointer of type + "RE2::Options*". */ +#define TO_OPT(opt) (reinterpret_cast(opt)) + +cre2_options_t * +cre2_opt_new(void) +/* Allocate and return a new options object. */ +{ + // FIXME: is this use of "nothrow" good to avoid raising exceptions + // when memory allocation fails and to return NULL instead? + return reinterpret_cast(new (std::nothrow) RE2::Options()); +} +void +cre2_opt_delete (cre2_options_t *opt) +/* Finalise an options object. */ +{ + delete TO_OPT(opt); +} + +/* Set or unset option flags in an options object. */ +#define OPT_BOOL(name) \ + void cre2_opt_set_##name (cre2_options_t *opt, int flag) \ + { \ + TO_OPT(opt)->set_##name(bool(flag)); \ + } \ + int cre2_opt_##name (cre2_options_t *opt) \ + { \ + return TO_OPT(opt)->name(); \ + } +OPT_BOOL(posix_syntax) +OPT_BOOL(longest_match) +OPT_BOOL(log_errors) +OPT_BOOL(literal) +OPT_BOOL(never_nl) +OPT_BOOL(case_sensitive) +OPT_BOOL(perl_classes) +OPT_BOOL(word_boundary) +OPT_BOOL(one_line) +#undef OPT_BOOL + +void +cre2_opt_set_encoding (cre2_options_t *opt, cre2_encoding_t enc) +/* Select the encoding in an options object. */ +{ + switch (enc) { + case CRE2_UTF8: + TO_OPT(opt)->set_encoding(RE2::Options::EncodingUTF8); + break; + case CRE2_Latin1: + TO_OPT(opt)->set_encoding(RE2::Options::EncodingLatin1); + break; + default: + fprintf(stderr, "CRE2: internal error: unknown encoding %d\n", enc); + exit(EXIT_FAILURE); + } +} +cre2_encoding_t +cre2_opt_encoding (cre2_options_t *opt) +{ + RE2::Options::Encoding E = TO_OPT(opt)->encoding(); + switch (E) { + case RE2::Options::EncodingUTF8: + return CRE2_UTF8; + case RE2::Options::EncodingLatin1: + return CRE2_Latin1; + default: + return CRE2_UNKNOWN; + } +} +void +cre2_opt_set_max_mem (cre2_options_t *opt, int m) +/* Configure the maximum amount of memory in an options object. */ +{ + TO_OPT(opt)->set_max_mem(m); +} +int +cre2_opt_max_mem (cre2_options_t *opt) +{ + return TO_OPT(opt)->max_mem(); +} + + +/** -------------------------------------------------------------------- + ** Precompiled regular expressions objects. + ** ----------------------------------------------------------------- */ + +#define TO_RE2(re) (reinterpret_cast(re)) +#define TO_CONST_RE2(re) (reinterpret_cast(re)) + +cre2_regexp_t * +cre2_new (const char *pattern, int pattern_len, const cre2_options_t *opt) +{ + re2::StringPiece pattern_re2(pattern, pattern_len); + if (opt) { + // FIXME: is this use of "nothrow" enough to avoid raising + // exceptions when memory allocation fails and to return NULL + // instead? + return reinterpret_cast + (new (std::nothrow) RE2(pattern_re2, *reinterpret_cast(opt))); + } else { + return reinterpret_cast (new (std::nothrow) RE2(pattern_re2)); + } +} +void +cre2_delete (cre2_regexp_t *re) +{ + delete TO_RE2(re); +} +const char * +cre2_pattern (const cre2_regexp_t *re) +{ + return TO_CONST_RE2(re)->pattern().c_str(); +} +int +cre2_error_code (const cre2_regexp_t *re) +{ + return int(TO_CONST_RE2(re)->error_code()); +} +const char * +cre2_error_string (const cre2_regexp_t *re) +{ + return TO_CONST_RE2(re)->error().c_str(); +} +void +cre2_error_arg (const cre2_regexp_t *re, cre2_string_t *arg) +{ + const std::string &argstr = TO_CONST_RE2(re)->error_arg(); + arg->data = argstr.data(); + arg->length = argstr.length(); +} +int +cre2_num_capturing_groups (const cre2_regexp_t *re) +{ + return TO_CONST_RE2(re)->NumberOfCapturingGroups(); +} +int +cre2_program_size (const cre2_regexp_t *re) +{ + return TO_CONST_RE2(re)->ProgramSize(); +} + + +/** -------------------------------------------------------------------- + ** Matching with precompiled regular expressions objects. + ** ----------------------------------------------------------------- */ + +int +cre2_match (const cre2_regexp_t *re , const char *text, + int textlen, int startpos, int endpos, cre2_anchor_t anchor, + cre2_string_t *match, int nmatch) +{ + re2::StringPiece text_re2(text, textlen); + re2::StringPiece *match_re2; + RE2::Anchor anchor_re2 = RE2::UNANCHORED; + bool retval; // 0 for no match + // 1 for successful matching + match_re2 = (re2::StringPiece *)malloc(sizeof(re2::StringPiece) * nmatch); + switch (anchor) { + case CRE2_ANCHOR_START: + anchor_re2 = RE2::ANCHOR_START; + break; + case CRE2_ANCHOR_BOTH: + anchor_re2 = RE2::ANCHOR_BOTH; + break; + case CRE2_UNANCHORED: + break; + } + retval = TO_CONST_RE2(re)->Match(text_re2, startpos, endpos, anchor_re2, match_re2, nmatch); + if (retval) { + for (int i=0; idata, text->length); \ + re2::StringPiece *strv; \ + RE2::Arg *argv; \ + RE2::Arg * *args; \ + bool retval; \ + strv = (re2::StringPiece *) (malloc(sizeof(re2::StringPiece) *nmatch)); \ + argv = (RE2::Arg *) (malloc(sizeof(RE2::Arg) *nmatch)); \ + args = (RE2::Arg **) (malloc(sizeof(RE2::Arg *) *nmatch)); \ + for (int i=0; idata, text->length); \ + re2::StringPiece *strv; \ + RE2::Arg *argv; \ + RE2::Arg * *args; \ + bool retval; \ + strv = (re2::StringPiece *) (malloc(sizeof(re2::StringPiece) *nmatch)); \ + argv = (RE2::Arg *) (malloc(sizeof(RE2::Arg) *nmatch)); \ + args = (RE2::Arg **) (malloc(sizeof(RE2::Arg *) *nmatch)); \ + for (int i=0; idata = input.data(); \ + text->length = input.length(); \ + for (int i=0; idata, text->length); \ + re2::StringPiece *strv; \ + RE2::Arg *argv; \ + RE2::Arg * *args; \ + bool retval; \ + strv = (re2::StringPiece *) (malloc(sizeof(re2::StringPiece) *nmatch)); \ + argv = (RE2::Arg *) (malloc(sizeof(RE2::Arg) *nmatch)); \ + args = (RE2::Arg **) (malloc(sizeof(RE2::Arg *) *nmatch)); \ + for (int i=0; idata, text->length); \ + re2::StringPiece *strv; \ + RE2::Arg *argv; \ + RE2::Arg * *args; \ + bool retval; \ + strv = (re2::StringPiece *) (malloc(sizeof(re2::StringPiece) *nmatch)); \ + argv = (RE2::Arg *) (malloc(sizeof(RE2::Arg) *nmatch)); \ + args = (RE2::Arg **) (malloc(sizeof(RE2::Arg *) *nmatch)); \ + for (int i=0; idata = input.data(); \ + text->length = input.length(); \ + for (int i=0; idata, text_and_target->length); + re2::StringPiece R(rewrite->data, rewrite->length); + char * buffer; /* this exists to make GCC shut up about const */ + bool retval; + retval = RE2::Replace(&S, pattern, R); + text_and_target->length = S.length(); + buffer = (char *)malloc(1+text_and_target->length); + if (buffer) { + S.copy(buffer, text_and_target->length); + buffer[text_and_target->length] = '\0'; + text_and_target->data = buffer; + } else + return -1; + return int(retval); + } catch(const std::exception &e) { + // e.what(); + return -1; + } catch(...) { + return -1; + } +} +int +cre2_replace_re (cre2_regexp_t * rex, cre2_string_t * text_and_target, cre2_string_t * rewrite) +{ + std::string S(text_and_target->data, text_and_target->length); + re2::StringPiece R(rewrite->data, rewrite->length); + char * buffer; /* this exists to make GCC shut up about const */ + bool retval; + retval = RE2::Replace(&S, *TO_RE2(rex), R); + text_and_target->length = S.length(); + buffer = (char *)malloc(1+text_and_target->length); + if (buffer) { + S.copy(buffer, text_and_target->length); + buffer[text_and_target->length] = '\0'; + text_and_target->data = buffer; + } else + return -1; + return int(retval); +} + +/* ------------------------------------------------------------------ */ + +int +cre2_global_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string_t * rewrite) +{ + std::string S(text_and_target->data, text_and_target->length); + re2::StringPiece R(rewrite->data, rewrite->length); + char * buffer; /* this exists to make GCC shut up about const */ + int retval; + retval = RE2::GlobalReplace(&S, pattern, R); + text_and_target->length = S.length(); + buffer = (char *)malloc(1+text_and_target->length); + if (buffer) { + S.copy(buffer, text_and_target->length); + buffer[text_and_target->length] = '\0'; + text_and_target->data = buffer; + } else + return -1; + return int(retval); +} +int +cre2_global_replace_re (cre2_regexp_t * rex, cre2_string_t * text_and_target, cre2_string_t * rewrite) +{ + std::string S(text_and_target->data, text_and_target->length); + re2::StringPiece R(rewrite->data, rewrite->length); + char * buffer; /* this exists to make GCC shut up about const */ + int retval; + retval = RE2::GlobalReplace(&S, *TO_RE2(rex), R); + text_and_target->length = S.length(); + buffer = (char *)malloc(1+text_and_target->length); + if (buffer) { + S.copy(buffer, text_and_target->length); + buffer[text_and_target->length] = '\0'; + text_and_target->data = buffer; + } else + return -1; + return retval; +} + +/* ------------------------------------------------------------------ */ + +int +cre2_extract (const char * pattern, cre2_string_t * text, + cre2_string_t * rewrite, cre2_string_t * target) +{ + re2::StringPiece T(text->data, text->length); + re2::StringPiece R(rewrite->data, rewrite->length); + std::string O; + char * buffer; /* this exists to make GCC shut up about const */ + bool retval; + retval = RE2::Extract(T, pattern, R, &O); + target->length = O.length(); + buffer = (char *)malloc(1+target->length); + if (buffer) { + O.copy(buffer, target->length); + buffer[target->length] = '\0'; + target->data = buffer; + } else + return -1; + return int(retval); +} +int +cre2_extract_re (cre2_regexp_t * rex, cre2_string_t * text, + cre2_string_t * rewrite, cre2_string_t * target) +{ + re2::StringPiece T(text->data, text->length); + re2::StringPiece R(rewrite->data, rewrite->length); + std::string O; + char * buffer; /* this exists to make GCC shut up about const */ + bool retval; + retval = RE2::Extract(T, *TO_RE2(rex), R, &O); + target->length = O.length(); + buffer = (char *)malloc(1+target->length); + if (buffer) { + O.copy(buffer, target->length); + buffer[target->length] = '\0'; + target->data = buffer; + } else + return -1; + return int(retval); +} + +/* ------------------------------------------------------------------ */ + +int +cre2_quote_meta (cre2_string_t * quoted, cre2_string_t * original) +{ + re2::StringPiece O(original->data, original->length); + std::string Q; + char * buffer; /* this exists to make GCC shut up about const */ + Q = RE2::QuoteMeta(O); + quoted->length = Q.length(); + buffer = (char *)malloc(1+quoted->length); + if (buffer) { + Q.copy(buffer, quoted->length); + buffer[quoted->length] = '\0'; + quoted->data = buffer; + return 0; + } else + return -1; +} +int +cre2_possible_match_range (cre2_regexp_t * rex, + cre2_string_t * min_, cre2_string_t * max_, int maxlen) +{ + std::string MIN, MAX; + cre2_string_t min, max; + char * buffer; /* this exists to make GCC shut up about const */ + bool retval; + retval = TO_RE2(rex)->PossibleMatchRange(&MIN, &MAX, maxlen); + if (retval) { + /* copy MIN */ + min.length = MIN.length(); + buffer = (char *)malloc(1+min.length); + if (buffer) { + MIN.copy(buffer, min.length); + buffer[min.length] = '\0'; + min.data = buffer; + } else + return -1; + /* copy MAX */ + max.length = MAX.length(); + buffer = (char *)malloc(1+max.length); + if (buffer) { + MAX.copy(buffer, max.length); + buffer[max.length] = '\0'; + max.data = buffer; + } else { + free((void *)min.data); + min.data = NULL; + return -1; + } + *min_ = min; + *max_ = max; + return 1; + } else + return 0; +} +int +cre2_check_rewrite_string (cre2_regexp_t * rex, cre2_string_t * rewrite, cre2_string_t * errmsg) +{ + re2::StringPiece R(rewrite->data, rewrite->length); + std::string E; + char * buffer; /* this exists to make GCC shut up about const */ + bool retval; + retval = TO_RE2(rex)->CheckRewriteString(R, &E); + if (retval) { + errmsg->data = NULL; + errmsg->length = 0; + return 1; + } else { + errmsg->length = E.length(); + buffer = (char *)malloc(1+errmsg->length); + if (buffer) { + E.copy(buffer, errmsg->length); + buffer[errmsg->length] = '\0'; + errmsg->data = buffer; + } else + return -1; + return 0; + } +} + +/* end of file */ diff --git a/outside/cre2/src/src/cre2.h b/outside/cre2/src/src/cre2.h new file mode 100644 index 0000000000..be17ac1af1 --- /dev/null +++ b/outside/cre2/src/src/cre2.h @@ -0,0 +1,299 @@ +/* + Header file for CRE2, a C language wrapper for RE2: a regular + expressions library by Google. + + Copyright (c) 2012 Marco Maggi + Copyright (c) 2011 Keegan McAllister + All rights reserved. + + For the license notice see the COPYING file. +*/ + + +/** -------------------------------------------------------------------- + ** Headers. + ** ----------------------------------------------------------------- */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef cre2_decl +# define cre2_decl extern +#endif + + +/** -------------------------------------------------------------------- + ** Version functions. + ** ----------------------------------------------------------------- */ + +cre2_decl const char * cre2_version_string (void); +cre2_decl int cre2_version_interface_current (void); +cre2_decl int cre2_version_interface_revision (void); +cre2_decl int cre2_version_interface_age (void); + + +/** -------------------------------------------------------------------- + ** Regular expressions configuration options. + ** ----------------------------------------------------------------- */ + +typedef void cre2_options_t; + +typedef enum cre2_encoding_t { + CRE2_UNKNOWN = 0, /* should never happen */ + CRE2_UTF8 = 1, + CRE2_Latin1 = 2 +} cre2_encoding_t; + +cre2_decl cre2_options_t *cre2_opt_new (void); +cre2_decl void cre2_opt_delete (cre2_options_t *opt); + +cre2_decl void cre2_opt_set_posix_syntax (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_longest_match (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_log_errors (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_literal (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_never_nl (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_case_sensitive (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_perl_classes (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_word_boundary (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_one_line (cre2_options_t *opt, int flag); +cre2_decl void cre2_opt_set_max_mem (cre2_options_t *opt, int m); +cre2_decl void cre2_opt_set_encoding (cre2_options_t *opt, cre2_encoding_t enc); + +cre2_decl int cre2_opt_posix_syntax (cre2_options_t *opt); +cre2_decl int cre2_opt_longest_match (cre2_options_t *opt); +cre2_decl int cre2_opt_log_errors (cre2_options_t *opt); +cre2_decl int cre2_opt_literal (cre2_options_t *opt); +cre2_decl int cre2_opt_never_nl (cre2_options_t *opt); +cre2_decl int cre2_opt_case_sensitive (cre2_options_t *opt); +cre2_decl int cre2_opt_perl_classes (cre2_options_t *opt); +cre2_decl int cre2_opt_word_boundary (cre2_options_t *opt); +cre2_decl int cre2_opt_one_line (cre2_options_t *opt); +cre2_decl int cre2_opt_max_mem (cre2_options_t *opt); +cre2_decl cre2_encoding_t cre2_opt_encoding (cre2_options_t *opt); + + +/** -------------------------------------------------------------------- + ** Precompiled regular expressions. + ** ----------------------------------------------------------------- */ + +typedef struct cre2_string_t { + const char * data; + int length; +} cre2_string_t; + +typedef void cre2_regexp_t; + +/* This definition must be kept in sync with the definition of "enum + ErrorCode" in the file "re2.h" of the original RE2 distribution. */ +typedef enum cre2_error_code_t { + CRE2_NO_ERROR = 0, + CRE2_ERROR_INTERNAL, /* unexpected error */ + /* parse errors */ + CRE2_ERROR_BAD_ESCAPE, /* bad escape sequence */ + CRE2_ERROR_BAD_CHAR_CLASS, /* bad character class */ + CRE2_ERROR_BAD_CHAR_RANGE, /* bad character class range */ + CRE2_ERROR_MISSING_BRACKET, /* missing closing ] */ + CRE2_ERROR_MISSING_PAREN, /* missing closing ) */ + CRE2_ERROR_TRAILING_BACKSLASH,/* trailing \ at end of regexp */ + CRE2_ERROR_REPEAT_ARGUMENT, /* repeat argument missing, e.g. "*" */ + CRE2_ERROR_REPEAT_SIZE, /* bad repetition argument */ + CRE2_ERROR_REPEA_TOP, /* bad repetition operator */ + CRE2_ERROR_BAD_PERL_OP, /* bad perl operator */ + CRE2_ERROR_BAD_UTF8, /* invalid UTF-8 in regexp */ + CRE2_ERROR_BAD_NAMED_CAPTURE, /* bad named capture group */ + CRE2_ERROR_PATTERN_TOO_LARGE, /* pattern too large (compile failed) */ +} cre2_error_code_t; + +/* construction and destruction */ +cre2_decl cre2_regexp_t * cre2_new (const char *pattern, int pattern_len, + const cre2_options_t *opt); +cre2_decl void cre2_delete (cre2_regexp_t *re); + +/* regular expression inspection */ +cre2_decl const char * cre2_pattern (const cre2_regexp_t *re); +cre2_decl int cre2_error_code (const cre2_regexp_t *re); +cre2_decl int cre2_num_capturing_groups (const cre2_regexp_t *re); +cre2_decl int cre2_program_size (const cre2_regexp_t *re); + +/* invalidated by further re use */ +cre2_decl const char *cre2_error_string(const cre2_regexp_t *re); +cre2_decl void cre2_error_arg(const cre2_regexp_t *re, cre2_string_t * arg); + + +/** -------------------------------------------------------------------- + ** Main matching functions. + ** ----------------------------------------------------------------- */ + +typedef enum cre2_anchor_t { + CRE2_UNANCHORED = 1, + CRE2_ANCHOR_START = 2, + CRE2_ANCHOR_BOTH = 3 +} cre2_anchor_t; + +typedef struct cre2_range_t { + long start; /* inclusive start index for bytevector */ + long past; /* exclusive end index for bytevector */ +} cre2_range_t; + +cre2_decl int cre2_match (const cre2_regexp_t * re, + const char * text, int textlen, + int startpos, int endpos, cre2_anchor_t anchor, + cre2_string_t * match, int nmatch); + +cre2_decl int cre2_easy_match (const char * pattern, int pattern_len, + const char * text, int text_len, + cre2_string_t * match, int nmatch); + +cre2_decl void cre2_strings_to_ranges (const char * text, cre2_range_t * ranges, + cre2_string_t * strings, int nmatch); + + +/** -------------------------------------------------------------------- + ** Other matching functions. + ** ----------------------------------------------------------------- */ + +typedef int cre2_match_stringz_fun_t (const char * pattern, const cre2_string_t * text, + cre2_string_t * match, int nmatch); + +typedef int cre2_match_stringz2_fun_t (const char * pattern, cre2_string_t * text, + cre2_string_t * match, int nmatch); + +typedef int cre2_match_rex_fun_t (cre2_regexp_t * rex, const cre2_string_t * text, + cre2_string_t * match, int nmatch); + +typedef int cre2_match_rex2_fun_t (cre2_regexp_t * rex, cre2_string_t * text, + cre2_string_t * match, int nmatch); + +cre2_decl cre2_match_stringz_fun_t cre2_full_match; +cre2_decl cre2_match_stringz_fun_t cre2_partial_match; +cre2_decl cre2_match_stringz2_fun_t cre2_consume; +cre2_decl cre2_match_stringz2_fun_t cre2_find_and_consume; + +cre2_decl cre2_match_rex_fun_t cre2_full_match_re; +cre2_decl cre2_match_rex_fun_t cre2_partial_match_re; +cre2_decl cre2_match_rex2_fun_t cre2_consume_re; +cre2_decl cre2_match_rex2_fun_t cre2_find_and_consume_re; + + +/** -------------------------------------------------------------------- + ** Problematic functions. + ** ----------------------------------------------------------------- */ + +/* Match the text in the buffer "text_and_target" against the rex in + "pattern" or "rex". Mutate "text_and_target" so that it references a + malloc'ed buffer holding the original text in which the first, and + only the first, match is substituted with the text in "rewrite". + Numeric backslash sequences (\1 to \9) in "rewrite" are substituted + with the portions of text matching the corresponding parenthetical + subexpressions. + + Return 0 if no match, 1 if successful match, -1 if error allocating + memory. */ +cre2_decl int cre2_replace (const char * pattern, + cre2_string_t * text_and_target, + cre2_string_t * rewrite); +cre2_decl int cre2_replace_re (cre2_regexp_t * rex, + cre2_string_t * text_and_target, + cre2_string_t * rewrite); + +/* Match the text in the buffer "text_and_target" against the rex in + "pattern" or "rex". Mutate "text_and_target" so that it references a + malloc'ed buffer holding the original text in which the all the + matching substrings are substituted with the text in "rewrite". + Numeric backslash sequences (\1 to \9) in "rewrite" are substituted + with the portions of text matching the corresponding parenthetical + subexpressions. + + Return 0 if no match, positive integer representing the number of + substitutions performed if successful match, -1 if error allocating + memory. */ +cre2_decl int cre2_global_replace (const char * pattern, + cre2_string_t * text_and_target, + cre2_string_t * rewrite); +cre2_decl int cre2_global_replace_re (cre2_regexp_t * rex, + cre2_string_t * text_and_target, + cre2_string_t * rewrite); + +/* Match the text in the buffer "text" against the rex in "pattern" or + "rex". Mutate "target" so that it references a malloc'ed buffer + holding a copy of the text in "rewrite"; numeric backslash sequences + (\1 to \9) in "rewrite" are substituted with the portions of text + matching the corresponding parenthetical subexpressions. + + Non-matching text in "text" is ignored. + + Return 0 if no match, 1 if successful match, -1 if error allocating + memory. */ +cre2_decl int cre2_extract (const char * pattern, + cre2_string_t * text, + cre2_string_t * rewrite, + cre2_string_t * target); + +cre2_decl int cre2_extract_re (cre2_regexp_t * rex, + cre2_string_t * text, + cre2_string_t * rewrite, + cre2_string_t * target); + +/* ------------------------------------------------------------------ */ + +/* Allocate a zero-terminated malloc'ed buffer and fill it with the text + from "original" having all the regexp meta characters quoted with + single backslashes. Return 0 if successful, return -1 if an error + allocating memory occurs. */ +cre2_decl int cre2_quote_meta (cre2_string_t * quoted, cre2_string_t * original); + +/* Compute a "minimum" string and a "maximum" string matching the given + regular expression. The min and max can in some cases be arbitrarily + precise, so the caller gets to specify "maxlen" begin the maximum + desired length of string returned. + + Assuming the call returns successfully, any string S that is an + anchored match for this regexp satisfies: + + min <= S && S <= max. + + Note that this function will only consider the first copy of an + infinitely repeated element (i.e., any regexp element followed by a + '*' or '+' operator). Regexps with "{N}" constructions are not + affected, as those do not compile down to infinite repetitions. + + "min_" and "max_" are mutated to reference zero-terminated malloc'ed + buffers holding the min and max strings. + + Return 0 if failure, return 1 if successful, return -1 if an error + allocating memory occurs. */ +cre2_decl int cre2_possible_match_range (cre2_regexp_t * rex, + cre2_string_t * min_, cre2_string_t * max_, + int maxlen); + +/* Check that the given rewrite string is suitable for use with this + regular expression. It checks that: + + * The regular expression has enough parenthesized subexpressions to + satisfy all of the \N tokens in rewrite + + * The rewrite string doesn't have any syntax errors. E.g., '\' + followed by anything other than a digit or '\'. + + A true return value guarantees that the replace and extract functions + won't fail because of a bad rewrite string. + + In case of error: "errmsg" is mutated to reference a zero-terminated + malloc'ed string describing the problem. + + Return 1 if the string is correct, return 0 if the string is + incorrect, return -1 if an error occurred allocating memory. */ +cre2_decl int cre2_check_rewrite_string (cre2_regexp_t * rex, + cre2_string_t * rewrite, cre2_string_t * errmsg); + + +/** -------------------------------------------------------------------- + ** Done. + ** ----------------------------------------------------------------- */ + +#ifdef __cplusplus +} // extern "C" +#endif + +/* end of file */ diff --git a/outside/cre2/src/tests/test-consume-match.c b/outside/cre2/src/tests/test-consume-match.c new file mode 100644 index 0000000000..6d21ce4b3d --- /dev/null +++ b/outside/cre2/src/tests/test-consume-match.c @@ -0,0 +1,335 @@ +/* + Part of: CRE2 + Contents: test for consume match function + Date: Tue Jan 3, 2012 + + Abstract + + Test file for consume match function. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + { /* success, no parentheses, full consumed buffer */ + const char * pattern = "ci.*ut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_consume(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp("", input.data, input.length)) + goto error; + } + { /* success, no parentheses, partially consumed buffer */ + const char * pattern = "ci.*ut"; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_consume(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_consume(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + result = cre2_consume(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_consume(pattern, &input, NULL, 0); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_consume(pattern, &input, match, nmatch); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_consume(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + result = cre2_consume(pattern, &input, match, nmatch); + if (0 != result) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_consume(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* wrong regexp specification */ + const char * pattern = "cia(o salut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_consume(pattern, &input, match, nmatch); + if (0 != result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + + +/* ------------------------------------------------------------------ */ + + { /* success, no parentheses, full buffer consumed */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, no parentheses, partial buffer consumed */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (0 != result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-easy-matching.c b/outside/cre2/src/tests/test-easy-matching.c new file mode 100644 index 0000000000..34fe1ef111 --- /dev/null +++ b/outside/cre2/src/tests/test-easy-matching.c @@ -0,0 +1,103 @@ +/* + Part of: CRE2 + Contents: test for easy matching + Date: Mon Jan 2, 2012 + + Abstract + + Test file for regular expressions matching. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + const char * pattern; + const char * text; + +/* ------------------------------------------------------------------ */ +/* single match */ + + pattern = "ciao"; + text = "ciao"; + { + cre2_string_t match; + int nmatch = 1; + cre2_easy_match(pattern, strlen(pattern), + text, strlen(text), + &match, nmatch); + PRINTF("match: "); + FWRITE(match.data, match.length, 1, stdout); + PRINTF("\n"); + if (0 != strncmp("ciao", match.data, match.length)) + goto error; + } + +/* ------------------------------------------------------------------ */ +/* wrong pattern */ + + pattern = "ci(ao"; + text = "ciao"; + { + cre2_string_t match; + int nmatch = 1; + int retval; + retval = cre2_easy_match(pattern, strlen(pattern), + text, strlen(text), + &match, nmatch); + if (2 != retval) + goto error; + } + +/* ------------------------------------------------------------------ */ +/* two groups */ + + pattern = "(ciao) (hello)"; + text = "ciao hello"; + { + int nmatch = 3; + cre2_string_t match[nmatch]; + cre2_easy_match(pattern, strlen(pattern), + text, strlen(text), + match, nmatch); + PRINTF("full match: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("first group: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + PRINTF("second group: "); + FWRITE(match[2].data, match[2].length, 1, stdout); + PRINTF("\n"); + if (0 != strncmp("ciao hello", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("ciao", match[1].data, match[1].length)) + goto error; + if (0 != strncmp("hello", match[2].data, match[2].length)) + goto error; + } + +/* ------------------------------------------------------------------ */ + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-find-and-consume-match.c b/outside/cre2/src/tests/test-find-and-consume-match.c new file mode 100644 index 0000000000..525bceb536 --- /dev/null +++ b/outside/cre2/src/tests/test-find-and-consume-match.c @@ -0,0 +1,335 @@ +/* + Part of: CRE2 + Contents: test for find and consume match function + Date: Tue Jan 3, 2012 + + Abstract + + Test file for find and consume match function. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + { /* success, no parentheses, full consumed buffer */ + const char * pattern = "ci.*ut"; + const char * text = "prefix ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_find_and_consume(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp("", input.data, input.length)) + goto error; + } + { /* success, no parentheses, partially consumed buffer */ + const char * pattern = "ci.*ut"; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_find_and_consume(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_find_and_consume(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + result = cre2_find_and_consume(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + const char * text = "prefix ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_find_and_consume(pattern, &input, NULL, 0); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + const char * text = "prefix ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_find_and_consume(pattern, &input, match, nmatch); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_find_and_consume(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + result = cre2_find_and_consume(pattern, &input, match, nmatch); + if (0 != result) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_find_and_consume(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* wrong regexp specification */ + const char * pattern = "cia(o salut"; + const char * text = "prefix ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_find_and_consume(pattern, &input, match, nmatch); + if (0 != result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + + +/* ------------------------------------------------------------------ */ + + { /* success, no parentheses, full buffer consumed */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "prefix ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, no parentheses, partial buffer consumed */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "prefix ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "prefix ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "prefix ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (0 != result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "prefix ciao salut hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_find_and_consume_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(" hello", input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-full-match.c b/outside/cre2/src/tests/test-full-match.c new file mode 100644 index 0000000000..5f3eae10ba --- /dev/null +++ b/outside/cre2/src/tests/test-full-match.c @@ -0,0 +1,308 @@ +/* + Part of: CRE2 + Contents: test for full match function + Date: Tue Jan 3, 2012 + + Abstract + + Test file for full match function. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + { /* success, no parentheses */ + const char * pattern = "ci.*ut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_full_match(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_full_match(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + result = cre2_full_match(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_full_match(pattern, &input, NULL, 0); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_full_match(pattern, &input, match, nmatch); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_full_match(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + result = cre2_full_match(pattern, &input, match, nmatch); + if (0 != result) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_full_match(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* wrong regexp specification */ + const char * pattern = "cia(o salut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_full_match(pattern, &input, match, nmatch); + if (0 != result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + + +/* ------------------------------------------------------------------ */ + + { /* success, no parentheses */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (0 != result) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_full_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-matching.c b/outside/cre2/src/tests/test-matching.c new file mode 100644 index 0000000000..17a3787d10 --- /dev/null +++ b/outside/cre2/src/tests/test-matching.c @@ -0,0 +1,122 @@ +/* + Part of: CRE2 + Contents: test for matching + Date: Mon Jan 2, 2012 + + Abstract + + Test file for regular expressions matching. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + cre2_regexp_t * rex; + cre2_options_t * opt; + const char * pattern; + +/* ------------------------------------------------------------------ */ +/* single match */ + + pattern = "ciao"; + opt = cre2_opt_new(); + cre2_opt_set_posix_syntax(opt, 1); + rex = cre2_new(pattern, strlen(pattern), opt); + { + if (cre2_error_code(rex)) + goto error; + cre2_string_t match; + int nmatch = 1; + int e; + const char * text = "ciao"; + int text_len = strlen(text); + + e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, &match, nmatch); + if (1 != e) + goto error; + PRINTF("match: retval=%d, ", e); + FWRITE(match.data, match.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + cre2_opt_delete(opt); + +/* ------------------------------------------------------------------ */ +/* two groups */ + + pattern = "(ciao) (hello)"; + opt = cre2_opt_new(); + rex = cre2_new(pattern, strlen(pattern), opt); + { + if (cre2_error_code(rex)) + goto error; + int nmatch = 3; + cre2_string_t strings[nmatch]; + cre2_range_t ranges[nmatch]; + int e; + const char * text = "ciao hello"; + int text_len = strlen(text); + + e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch); + if (1 != e) + goto error; + cre2_strings_to_ranges(text, ranges, strings, nmatch); + PRINTF("full match: "); + FWRITE(text+ranges[0].start, ranges[0].past-ranges[0].start, 1, stdout); + PRINTF("\n"); + PRINTF("first group: "); + FWRITE(text+ranges[1].start, ranges[1].past-ranges[1].start, 1, stdout); + PRINTF("\n"); + PRINTF("second group: "); + FWRITE(text+ranges[2].start, ranges[2].past-ranges[2].start, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + cre2_opt_delete(opt); + +/* ------------------------------------------------------------------ */ +/* test literal option */ + + pattern = "(ciao) (hello)"; + opt = cre2_opt_new(); + cre2_opt_set_literal(opt, 1); + rex = cre2_new(pattern, strlen(pattern), opt); + { + if (cre2_error_code(rex)) + goto error; + int nmatch = 0; + int e; + const char * text = "(ciao) (hello)"; + int text_len = strlen(text); + e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, NULL, nmatch); + if (0 == e) + goto error; + } + cre2_delete(rex); + cre2_opt_delete(opt); + +/* ------------------------------------------------------------------ */ + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-misc.c b/outside/cre2/src/tests/test-misc.c new file mode 100644 index 0000000000..d2351612b0 --- /dev/null +++ b/outside/cre2/src/tests/test-misc.c @@ -0,0 +1,119 @@ +/* + Part of: CRE2 + Contents: test for miscellaneous functions + Date: Wed Jan 4, 2012 + + Abstract + + Test file for miscellaneous functions. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + { /* quote meta characters */ + const char * pattern = "1.5-2.0?"; + cre2_string_t original = { + .data = pattern, + .length = strlen(pattern) + }; + cre2_string_t quoted; + int result; + result = cre2_quote_meta("ed, &original); + if (0 != result) + goto error; + if (0 != strncmp("1\\.5\\-2\\.0\\?", quoted.data, quoted.length)) + goto error; + free((void *)quoted.data); + } + + /* ------------------------------------------------------------------ */ + + { /* minimum and maximum matching strings */ + const char * pattern = "(?i)ABCdef"; + cre2_regexp_t * rex; + cre2_string_t min, max; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_possible_match_range(rex, &min, &max, 1024); + if (1 != result) + goto error; + if (0 != strncmp("ABCDEF", min.data, min.length)) + goto error; + if (0 != strncmp("abcdef", max.data, max.length)) + goto error; + } + cre2_delete(rex); + free((void *)min.data); + free((void *)max.data); + } + + /* ------------------------------------------------------------------ */ + + { /* successfully check rewrite string */ + const char * pattern = "a(b)c"; + const char * subst = "def"; + cre2_string_t rewrite = { + .data = subst, + .length = strlen(subst) + }; + cre2_regexp_t * rex; + cre2_string_t errmsg; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_check_rewrite_string(rex, &rewrite, &errmsg); + if (1 != result) + goto error; + } + cre2_delete(rex); + } + { /* failed check rewrite string */ + const char * pattern = "a(b)c"; + const char * subst = "\\1 \\2"; + cre2_string_t rewrite = { + .data = subst, + .length = strlen(subst) + }; + cre2_regexp_t * rex; + cre2_string_t errmsg; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_check_rewrite_string(rex, &rewrite, &errmsg); + if (0 != result) + goto error; + PRINTF("error message: "); + FWRITE(errmsg.data, errmsg.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)errmsg.data); + } + +/* ------------------------------------------------------------------ */ + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-options.c b/outside/cre2/src/tests/test-options.c new file mode 100644 index 0000000000..c6a47d39db --- /dev/null +++ b/outside/cre2/src/tests/test-options.c @@ -0,0 +1,43 @@ +/* + Part of: CRE2 + Contents: test for options + Date: Mon Jan 2, 2012 + + Abstract + + Test file for options objects. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include + +int +main (int argc, const char *const argv[]) +{ + cre2_options_t * opt; + + opt = cre2_opt_new(); + { + cre2_opt_set_posix_syntax(opt, 1); + cre2_opt_set_longest_match(opt, 1); + cre2_opt_set_log_errors(opt, 1); + cre2_opt_set_literal(opt, 1); + cre2_opt_set_never_nl(opt, 1); + cre2_opt_set_case_sensitive(opt, 1); + cre2_opt_set_perl_classes(opt, 1); + cre2_opt_set_word_boundary(opt, 1); + cre2_opt_set_one_line(opt, 1); + cre2_opt_set_encoding(opt, CRE2_UTF8); + cre2_opt_set_encoding(opt, CRE2_Latin1); + cre2_opt_set_max_mem(opt, 4096); + } + cre2_opt_delete(opt); + exit(EXIT_SUCCESS); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-partial-match.c b/outside/cre2/src/tests/test-partial-match.c new file mode 100644 index 0000000000..5825789c20 --- /dev/null +++ b/outside/cre2/src/tests/test-partial-match.c @@ -0,0 +1,308 @@ +/* + Part of: CRE2 + Contents: test for partial match function + Date: Tue Jan 3, 2012 + + Abstract + + Test file for partial match function. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + { /* success, no parentheses */ + const char * pattern = "ci.*ut"; + const char * text = "pre ciao salut post"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_partial_match(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_partial_match(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + result = cre2_partial_match(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_partial_match(pattern, &input, NULL, 0); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_partial_match(pattern, &input, match, nmatch); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + result = cre2_partial_match(pattern, &input, NULL, 0); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + result = cre2_partial_match(pattern, &input, match, nmatch); + if (0 != result) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_partial_match(pattern, &input, match, nmatch); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* wrong regexp specification */ + const char * pattern = "cia(o salut"; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + result = cre2_partial_match(pattern, &input, match, nmatch); + if (0 != result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + + +/* ------------------------------------------------------------------ */ + + { /* success, no parentheses */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, one match entry */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + { /* success, two parenthetical subexpressions, two match entries */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp("salut", match[1].data, match[1].length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + PRINTF("match 1: "); + FWRITE(match[1].data, match[1].length, 1, stdout); + PRINTF("\n"); + } + { /* failure, no parentheses */ + const char * pattern = "ci.*ut"; + cre2_regexp_t * rex; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (result) + goto error; + } + { /* failure, one parenthetical subexpression */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao hello"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* success, one parenthetical subexpression, no match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, NULL, 0); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + } + { /* failure, one parenthetical subexpression, two match entries */ + const char * pattern = "(ciao) salut"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 2; + cre2_string_t match[nmatch]; + int result; + memset(match, '\0', nmatch * sizeof(cre2_string_t)); + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (0 != result) + goto error; + } + { /* success, two parenthetical subexpressions, one match entry */ + const char * pattern = "(ciao) (salut)"; + cre2_regexp_t * rex; + const char * text = "ciao salut"; + cre2_string_t input = { .data = text, .length = strlen(text) }; + int nmatch = 1; + cre2_string_t match[nmatch]; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + result = cre2_partial_match_re(rex, &input, match, nmatch); + cre2_delete(rex); + if (! result) + goto error; + if (0 != strncmp("ciao", match[0].data, match[0].length)) + goto error; + if (0 != strncmp(text, input.data, input.length)) + goto error; + PRINTF("match 0: "); + FWRITE(match[0].data, match[0].length, 1, stdout); + PRINTF("\n"); + } + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-replace.c b/outside/cre2/src/tests/test-replace.c new file mode 100644 index 0000000000..e1098cf951 --- /dev/null +++ b/outside/cre2/src/tests/test-replace.c @@ -0,0 +1,257 @@ +/* + Part of: CRE2 + Contents: test for replace + Date: Wed Jan 4, 2012 + + Abstract + + Test file for replacing. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + { /* replace all the buffer using the full match */ + cre2_regexp_t * rex; + const char * pattern = "ciao hello salut"; + const char * text = "ciao hello salut"; + const char * replace = "pre \\0 post"; + cre2_string_t target = { + .data = text, + .length = strlen(text) + }; + cre2_string_t rewrite = { + .data = replace, + .length = strlen(replace) + }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_replace_re(rex, &target, &rewrite); + if (1 != result) + goto error; + if (0 != strncmp("pre ciao hello salut post", target.data, target.length)) + goto error; + if ('\0' != target.data[target.length]) + goto error; + PRINTF("rewritten to: "); + FWRITE(target.data, target.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)target.data); + } + { /* replace substring with fixed string */ + cre2_regexp_t * rex; + const char * pattern = "hello"; + const char * text = "ciao hello salut"; + const char * replace = "ohayo"; + cre2_string_t target = { + .data = text, + .length = strlen(text) + }; + cre2_string_t rewrite = { + .data = replace, + .length = strlen(replace) + }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_replace_re(rex, &target, &rewrite); + if (1 != result) + goto error; + if (0 != strncmp("ciao ohayo salut", target.data, target.length)) + goto error; + if ('\0' != target.data[target.length]) + goto error; + PRINTF("rewritten to: "); + FWRITE(target.data, target.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)target.data); + } + + /* ------------------------------------------------------------------ */ + + { /* global replace all the buffer using the full match */ + cre2_regexp_t * rex; + const char * pattern = "ciao hello salut"; + const char * text = "ciao hello salut"; + const char * replace = "pre \\0 post"; + cre2_string_t target = { + .data = text, + .length = strlen(text) + }; + cre2_string_t rewrite = { + .data = replace, + .length = strlen(replace) + }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_global_replace_re(rex, &target, &rewrite); + if (1 != result) + goto error; + if (0 != strncmp("pre ciao hello salut post", target.data, target.length)) + goto error; + if ('\0' != target.data[target.length]) + goto error; + PRINTF("rewritten to: "); + FWRITE(target.data, target.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)target.data); + } + { /* global replace substring with fixed string */ + cre2_regexp_t * rex; + const char * pattern = "hello"; + const char * text = "ciao hello salut"; + const char * replace = "ohayo"; + cre2_string_t target = { + .data = text, + .length = strlen(text) + }; + cre2_string_t rewrite = { + .data = replace, + .length = strlen(replace) + }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_global_replace_re(rex, &target, &rewrite); + if (1 != result) + goto error; + if (0 != strncmp("ciao ohayo salut", target.data, target.length)) + goto error; + if ('\0' != target.data[target.length]) + goto error; + PRINTF("rewritten to: "); + FWRITE(target.data, target.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)target.data); + } + { /* global replace multiple substrings with parametrised string */ + cre2_regexp_t * rex; + const char * pattern = "[a-z]+\\(([0-9]+)\\)"; + const char * text = "ciao(1) hello(2) salut(3)"; + const char * replace = "ohayo(\\1)"; + cre2_string_t target = { + .data = text, + .length = strlen(text) + }; + cre2_string_t rewrite = { + .data = replace, + .length = strlen(replace) + }; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_global_replace_re(rex, &target, &rewrite); + if (3 != result) /* 3 substitutions */ + goto error; + if (0 != strncmp("ohayo(1) ohayo(2) ohayo(3)", target.data, target.length)) + goto error; + if ('\0' != target.data[target.length]) + goto error; + PRINTF("result %d, rewritten to: ", result); + FWRITE(target.data, target.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)target.data); + } + +/* ------------------------------------------------------------------ */ + + { /* extract all the buffer using the full match */ + cre2_regexp_t * rex; + const char * pattern = "ciao hello salut"; + const char * text = "ciao hello salut"; + const char * replace = "pre \\0 post"; + cre2_string_t input = { + .data = text, + .length = strlen(text) + }; + cre2_string_t rewrite = { + .data = replace, + .length = strlen(replace) + }; + cre2_string_t target; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_extract_re(rex, &input, &rewrite, &target); + if (1 != result) + goto error; + if (0 != strncmp("pre ciao hello salut post", target.data, target.length)) + goto error; + if ('\0' != target.data[target.length]) + goto error; + PRINTF("rewritten to: "); + FWRITE(target.data, target.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)target.data); + } + { /* extract substring with fixed string */ + cre2_regexp_t * rex; + const char * pattern = "hello([0-9]+)"; + const char * text = "ciao hello123 salut"; + const char * replace = "ohayo\\1"; + cre2_string_t input = { + .data = text, + .length = strlen(text) + }; + cre2_string_t rewrite = { + .data = replace, + .length = strlen(replace) + }; + cre2_string_t target; + int result; + rex = cre2_new(pattern, strlen(pattern), NULL); + { + result = cre2_extract_re(rex, &input, &rewrite, &target); + if (1 != result) + goto error; + if (0 != strncmp("ohayo123", target.data, target.length)) + goto error; + if ('\0' != target.data[target.length]) + goto error; + PRINTF("rewritten to: "); + FWRITE(target.data, target.length, 1, stdout); + PRINTF("\n"); + } + cre2_delete(rex); + free((void *)target.data); + } + + /* ------------------------------------------------------------------ */ + + exit(EXIT_SUCCESS); + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-rex-alloc.c b/outside/cre2/src/tests/test-rex-alloc.c new file mode 100644 index 0000000000..cde19f3769 --- /dev/null +++ b/outside/cre2/src/tests/test-rex-alloc.c @@ -0,0 +1,113 @@ +/* + Part of: CRE2 + Contents: test for rex allocation + Date: Mon Jan 2, 2012 + + Abstract + + Test file for regular expressions allocation. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include +#include + +#if 0 +# define PRINTF printf +# define FWRITE fwrite +#else +# define PRINTF(MSG, ...) /* empty string */ +# define FWRITE(BUF, ...) /* empty string */ +#endif + +int +main (int argc, const char *const argv[]) +{ + cre2_regexp_t * rex; + cre2_options_t * opt; + opt = cre2_opt_new(); + cre2_opt_set_posix_syntax(opt, 1); + rex = cre2_new("ciao", 4, opt); + { + cre2_string_t S; + PRINTF("pattern: %s\n", cre2_pattern(rex)); + PRINTF("error code: %d\n", cre2_error_code(rex)); + PRINTF("error string: \"%s\"\n", cre2_error_string(rex)); + PRINTF("number of capturing groups: %d\n", cre2_num_capturing_groups(rex)); + PRINTF("program size: %d\n", cre2_program_size(rex)); + cre2_error_arg(rex, &S); + PRINTF("error arg: len=%d, data=\"%s\"\n", S.length, S.data); + if (cre2_error_code(rex)) + goto error; + if (cre2_num_capturing_groups(rex)) + goto error; + if (cre2_error_code(rex)) + goto error; + if (0 != strlen(cre2_error_string(rex))) + goto error; + if (0 != S.length) + goto error; + } + cre2_delete(rex); + cre2_opt_delete(opt); + +/* ------------------------------------------------------------------ */ +/* no options object */ + + rex = cre2_new("ciao", 4, NULL); + { + if (cre2_error_code(rex)) + goto error; + } + cre2_delete(rex); + +/* ------------------------------------------------------------------ */ + + opt = cre2_opt_new(); + cre2_opt_set_posix_syntax(opt, 1); + rex = cre2_new("ci(ao)", 6, opt); + { + PRINTF("error code: %d\n", cre2_error_code(rex)); + PRINTF("number of capturing groups: %d\n", cre2_num_capturing_groups(rex)); + PRINTF("program size: %d\n", cre2_program_size(rex)); + if (cre2_error_code(rex)) + goto error; + if (1 != cre2_num_capturing_groups(rex)) + goto error; + } + cre2_delete(rex); + cre2_opt_delete(opt); + +/* ------------------------------------------------------------------ */ + + opt = cre2_opt_new(); + cre2_opt_set_log_errors(opt, 0); + rex = cre2_new("ci(ao", 5, opt); + { + int code = cre2_error_code(rex); + const char * msg = cre2_error_string(rex); + cre2_string_t S; + cre2_error_arg(rex, &S); + if (CRE2_ERROR_MISSING_PAREN != code) + goto error; + if (! msg) + goto error; + PRINTF("pattern: %s\n", cre2_pattern(rex)); + PRINTF("error: code=%d, msg=\"%s\"\n", code, msg); + PRINTF("error arg: len=%d, data=\"%s\"\n", S.length, S.data); + } + cre2_delete(rex); + cre2_opt_delete(opt); + + exit(EXIT_SUCCESS); + + error: + exit(EXIT_FAILURE); +} + +/* end of file */ diff --git a/outside/cre2/src/tests/test-version.c b/outside/cre2/src/tests/test-version.c new file mode 100644 index 0000000000..aa1e5a67f1 --- /dev/null +++ b/outside/cre2/src/tests/test-version.c @@ -0,0 +1,30 @@ +/* + Part of: CRE2 + Contents: test for version functions + Date: Mon Jan 2, 2012 + + Abstract + + Test file for version functions. + + Copyright (C) 2012 Marco Maggi + + See the COPYING file. +*/ + +#include +#include +#include + +int +main (int argc, const char *const argv[]) +{ + printf("version number string: %s\n", cre2_version_string()); + printf("libtool version number: %d:%d:%d\n", + cre2_version_interface_current(), + cre2_version_interface_revision(), + cre2_version_interface_age()); + exit(EXIT_SUCCESS); +} + +/* end of file */ diff --git a/v/unix.c b/v/unix.c index 9415462f34..ab274291e8 100644 --- a/v/unix.c +++ b/v/unix.c @@ -180,12 +180,14 @@ _unix_fs_event_cb(uv_fs_event_t* was_u, u2_unod* nod_u = (void*)was_u; // uL(fprintf(uH, "fs: %s in %s\n", pax_c, nod_u->pax_c)); + u2_lo_open(); { while ( nod_u ) { nod_u->dry = u2_no; nod_u = (u2_unod*) nod_u->par_u; } } + u2_lo_shut(u2_yes); } /* _unix_file_watch(): create file tracker (from filesystem)