From e199b8b737eff0cf6d1b1d1e57865cd0b44f00f0 Mon Sep 17 00:00:00 2001 From: Tae Won Ha Date: Fri, 17 Jan 2020 10:19:16 +0100 Subject: [PATCH] Add new deps --- VimR/VimR.xcodeproj/project.pbxproj | 66 ++ bin/build_deps.sh | 98 +- third-party/libag/include/config.h | 120 +++ third-party/libag/include/decompress.h | 26 + third-party/libag/include/ignore.h | 48 + third-party/libag/include/lang.h | 36 + third-party/libag/include/log.h | 32 + third-party/libag/include/options.h | 105 ++ third-party/libag/include/print.h | 26 + third-party/libag/include/scandir.h | 20 + third-party/libag/include/search.h | 78 ++ third-party/libag/include/uthash.h | 971 ++++++++++++++++++ third-party/libag/include/util.h | 119 +++ third-party/libag/lib/libag.a | 3 + third-party/libpcre/include/pcre.h | 677 ++++++++++++ third-party/libpcre/include/pcre_scanner.h | 172 ++++ .../libpcre/include/pcre_stringpiece.h | 180 ++++ third-party/libpcre/include/pcrecpp.h | 710 +++++++++++++ third-party/libpcre/include/pcrecpparg.h | 174 ++++ third-party/libpcre/include/pcreposix.h | 146 +++ third-party/libpcre/lib/libpcre.1.dylib | 3 + third-party/libpcre/lib/libpcre.a | 3 + third-party/libpcre/lib/libpcre.dylib | 1 + third-party/libpcre/lib/libpcre.la | 41 + third-party/libpcre/lib/libpcre16.0.dylib | 3 + third-party/libpcre/lib/libpcre16.a | 3 + third-party/libpcre/lib/libpcre16.dylib | 1 + third-party/libpcre/lib/libpcre16.la | 41 + third-party/libpcre/lib/libpcre32.0.dylib | 3 + third-party/libpcre/lib/libpcre32.a | 3 + third-party/libpcre/lib/libpcre32.dylib | 1 + third-party/libpcre/lib/libpcre32.la | 41 + third-party/libpcre/lib/libpcrecpp.0.dylib | 3 + third-party/libpcre/lib/libpcrecpp.a | 3 + third-party/libpcre/lib/libpcrecpp.dylib | 1 + third-party/libpcre/lib/libpcrecpp.la | 41 + third-party/libpcre/lib/libpcreposix.0.dylib | 3 + third-party/libpcre/lib/libpcreposix.a | 3 + third-party/libpcre/lib/libpcreposix.dylib | 1 + third-party/libpcre/lib/libpcreposix.la | 41 + third-party/libpcre/lib/pkgconfig/libpcre.pc | 13 + .../libpcre/lib/pkgconfig/libpcre16.pc | 13 + .../libpcre/lib/pkgconfig/libpcre32.pc | 13 + .../libpcre/lib/pkgconfig/libpcrecpp.pc | 12 + .../libpcre/lib/pkgconfig/libpcreposix.pc | 13 + third-party/libxz/include/lzma.h | 325 ++++++ third-party/libxz/include/lzma/base.h | 659 ++++++++++++ third-party/libxz/include/lzma/bcj.h | 90 ++ third-party/libxz/include/lzma/block.h | 581 +++++++++++ third-party/libxz/include/lzma/check.h | 150 +++ third-party/libxz/include/lzma/container.h | 632 ++++++++++++ third-party/libxz/include/lzma/delta.h | 77 ++ third-party/libxz/include/lzma/filter.h | 425 ++++++++ third-party/libxz/include/lzma/hardware.h | 64 ++ third-party/libxz/include/lzma/index.h | 686 +++++++++++++ third-party/libxz/include/lzma/index_hash.h | 107 ++ third-party/libxz/include/lzma/lzma12.h | 420 ++++++++ third-party/libxz/include/lzma/stream_flags.h | 223 ++++ third-party/libxz/include/lzma/version.h | 121 +++ third-party/libxz/include/lzma/vli.h | 166 +++ third-party/libxz/lib/liblzma.5.dylib | 3 + third-party/libxz/lib/liblzma.a | 3 + third-party/libxz/lib/liblzma.dylib | 1 + third-party/libxz/lib/liblzma.la | 41 + third-party/libxz/lib/pkgconfig/liblzma.pc | 19 + 65 files changed, 8903 insertions(+), 1 deletion(-) create mode 100644 third-party/libag/include/config.h create mode 100644 third-party/libag/include/decompress.h create mode 100644 third-party/libag/include/ignore.h create mode 100644 third-party/libag/include/lang.h create mode 100644 third-party/libag/include/log.h create mode 100644 third-party/libag/include/options.h create mode 100644 third-party/libag/include/print.h create mode 100644 third-party/libag/include/scandir.h create mode 100644 third-party/libag/include/search.h create mode 100644 third-party/libag/include/uthash.h create mode 100644 third-party/libag/include/util.h create mode 100644 third-party/libag/lib/libag.a create mode 100644 third-party/libpcre/include/pcre.h create mode 100644 third-party/libpcre/include/pcre_scanner.h create mode 100644 third-party/libpcre/include/pcre_stringpiece.h create mode 100644 third-party/libpcre/include/pcrecpp.h create mode 100644 third-party/libpcre/include/pcrecpparg.h create mode 100644 third-party/libpcre/include/pcreposix.h create mode 100755 third-party/libpcre/lib/libpcre.1.dylib create mode 100644 third-party/libpcre/lib/libpcre.a create mode 120000 third-party/libpcre/lib/libpcre.dylib create mode 100755 third-party/libpcre/lib/libpcre.la create mode 100755 third-party/libpcre/lib/libpcre16.0.dylib create mode 100644 third-party/libpcre/lib/libpcre16.a create mode 120000 third-party/libpcre/lib/libpcre16.dylib create mode 100755 third-party/libpcre/lib/libpcre16.la create mode 100755 third-party/libpcre/lib/libpcre32.0.dylib create mode 100644 third-party/libpcre/lib/libpcre32.a create mode 120000 third-party/libpcre/lib/libpcre32.dylib create mode 100755 third-party/libpcre/lib/libpcre32.la create mode 100755 third-party/libpcre/lib/libpcrecpp.0.dylib create mode 100644 third-party/libpcre/lib/libpcrecpp.a create mode 120000 third-party/libpcre/lib/libpcrecpp.dylib create mode 100755 third-party/libpcre/lib/libpcrecpp.la create mode 100755 third-party/libpcre/lib/libpcreposix.0.dylib create mode 100644 third-party/libpcre/lib/libpcreposix.a create mode 120000 third-party/libpcre/lib/libpcreposix.dylib create mode 100755 third-party/libpcre/lib/libpcreposix.la create mode 100644 third-party/libpcre/lib/pkgconfig/libpcre.pc create mode 100644 third-party/libpcre/lib/pkgconfig/libpcre16.pc create mode 100644 third-party/libpcre/lib/pkgconfig/libpcre32.pc create mode 100644 third-party/libpcre/lib/pkgconfig/libpcrecpp.pc create mode 100644 third-party/libpcre/lib/pkgconfig/libpcreposix.pc create mode 100644 third-party/libxz/include/lzma.h create mode 100644 third-party/libxz/include/lzma/base.h create mode 100644 third-party/libxz/include/lzma/bcj.h create mode 100644 third-party/libxz/include/lzma/block.h create mode 100644 third-party/libxz/include/lzma/check.h create mode 100644 third-party/libxz/include/lzma/container.h create mode 100644 third-party/libxz/include/lzma/delta.h create mode 100644 third-party/libxz/include/lzma/filter.h create mode 100644 third-party/libxz/include/lzma/hardware.h create mode 100644 third-party/libxz/include/lzma/index.h create mode 100644 third-party/libxz/include/lzma/index_hash.h create mode 100644 third-party/libxz/include/lzma/lzma12.h create mode 100644 third-party/libxz/include/lzma/stream_flags.h create mode 100644 third-party/libxz/include/lzma/version.h create mode 100644 third-party/libxz/include/lzma/vli.h create mode 100755 third-party/libxz/lib/liblzma.5.dylib create mode 100644 third-party/libxz/lib/liblzma.a create mode 120000 third-party/libxz/lib/liblzma.dylib create mode 100755 third-party/libxz/lib/liblzma.la create mode 100644 third-party/libxz/lib/pkgconfig/liblzma.pc diff --git a/VimR/VimR.xcodeproj/project.pbxproj b/VimR/VimR.xcodeproj/project.pbxproj index 67a5a078..8daffa85 100644 --- a/VimR/VimR.xcodeproj/project.pbxproj +++ b/VimR/VimR.xcodeproj/project.pbxproj @@ -701,6 +701,17 @@ 4BF18C381FD2E2AB00DF95D1 /* RxCocoa.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = RxCocoa.framework; path = ../Carthage/Build/Mac/RxCocoa.framework; sourceTree = ""; }; 4BF18C491FD2E2DE00DF95D1 /* Nimble.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Nimble.framework; path = ../Carthage/Build/Mac/Nimble.framework; sourceTree = ""; }; 4BF18C501FD2E4F200DF95D1 /* RxTest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = RxTest.framework; path = ../Carthage/Build/Mac/RxTest.framework; sourceTree = ""; }; + 4BF70EB423D1B335009E51E9 /* print.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = print.h; path = "../../third-party/libag/include/print.h"; sourceTree = ""; }; + 4BF70EB523D1B335009E51E9 /* util.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = util.h; path = "../../third-party/libag/include/util.h"; sourceTree = ""; }; + 4BF70EB623D1B335009E51E9 /* log.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = log.h; path = "../../third-party/libag/include/log.h"; sourceTree = ""; }; + 4BF70EB723D1B335009E51E9 /* decompress.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = decompress.h; path = "../../third-party/libag/include/decompress.h"; sourceTree = ""; }; + 4BF70EB823D1B335009E51E9 /* search.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = search.h; path = "../../third-party/libag/include/search.h"; sourceTree = ""; }; + 4BF70EB923D1B335009E51E9 /* options.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = options.h; path = "../../third-party/libag/include/options.h"; sourceTree = ""; }; + 4BF70EBA23D1B335009E51E9 /* config.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = config.h; path = "../../third-party/libag/include/config.h"; sourceTree = ""; }; + 4BF70EBB23D1B335009E51E9 /* uthash.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = uthash.h; path = "../../third-party/libag/include/uthash.h"; sourceTree = ""; }; + 4BF70EBC23D1B335009E51E9 /* ignore.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = ignore.h; path = "../../third-party/libag/include/ignore.h"; sourceTree = ""; }; + 4BF70EBD23D1B335009E51E9 /* lang.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = lang.h; path = "../../third-party/libag/include/lang.h"; sourceTree = ""; }; + 4BF70EBE23D1B335009E51E9 /* scandir.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scandir.h; path = "../../third-party/libag/include/scandir.h"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -1125,6 +1136,7 @@ 4BEBA5071CFF374B00673FDF /* VimR */ = { isa = PBXGroup; children = ( + 4BF70EB323D1B30A009E51E9 /* the_silver_searcher */, 4BB4CCEE224A7E6D00474C79 /* FSEvents */, 4B004BCA21063B4D0043A396 /* DictionaryCoding */, 4B2A2C0D1D0353750074CE9A /* Bridge.h */, @@ -1155,6 +1167,24 @@ path = VimRTests; sourceTree = ""; }; + 4BF70EB323D1B30A009E51E9 /* the_silver_searcher */ = { + isa = PBXGroup; + children = ( + 4BF70EBA23D1B335009E51E9 /* config.h */, + 4BF70EB723D1B335009E51E9 /* decompress.h */, + 4BF70EBC23D1B335009E51E9 /* ignore.h */, + 4BF70EBD23D1B335009E51E9 /* lang.h */, + 4BF70EB623D1B335009E51E9 /* log.h */, + 4BF70EB923D1B335009E51E9 /* options.h */, + 4BF70EB423D1B335009E51E9 /* print.h */, + 4BF70EBE23D1B335009E51E9 /* scandir.h */, + 4BF70EB823D1B335009E51E9 /* search.h */, + 4BF70EBB23D1B335009E51E9 /* uthash.h */, + 4BF70EB523D1B335009E51E9 /* util.h */, + ); + name = the_silver_searcher; + sourceTree = ""; + }; 4BF8EED91D858C4400CAC08A /* Utils */ = { isa = PBXGroup; children = ( @@ -1783,8 +1813,17 @@ "$(inherited)", "$(PROJECT_DIR)/../Carthage/Build/Mac", ); + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../third-party/libxz/include", + "$(PROJECT_DIR)/../third-party/libpcre/include", + ); INFOPLIST_FILE = "$(SRCROOT)/VimR.dev.Info.plist"; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; + OTHER_LDFLAGS = ( + "$(PROJECT_DIR)/../third-party/libxz/lib/liblzma.a", + "$(PROJECT_DIR)/../third-party/libpcre/lib/libpcre.a", + "$(PROJECT_DIR)/../third-party/libag/lib/libag.a", + ); PRODUCT_BUNDLE_IDENTIFIER = com.qvacua.VimR.dev; PRODUCT_MODULE_NAME = VimR; PRODUCT_NAME = "VimR-dev"; @@ -1803,8 +1842,17 @@ "$(inherited)", "$(PROJECT_DIR)/../Carthage/Build/Mac", ); + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../third-party/libxz/include", + "$(PROJECT_DIR)/../third-party/libpcre/include", + ); INFOPLIST_FILE = "$(SRCROOT)/VimR.dev.Info.plist"; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; + OTHER_LDFLAGS = ( + "$(PROJECT_DIR)/../third-party/libxz/lib/liblzma.a", + "$(PROJECT_DIR)/../third-party/libpcre/lib/libpcre.a", + "$(PROJECT_DIR)/../third-party/libag/lib/libag.a", + ); PRODUCT_BUNDLE_IDENTIFIER = com.qvacua.VimR.dev; PRODUCT_MODULE_NAME = VimR; PRODUCT_NAME = "VimR-dev"; @@ -1937,8 +1985,17 @@ "$(inherited)", "$(PROJECT_DIR)/../Carthage/Build/Mac", ); + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../third-party/libxz/include", + "$(PROJECT_DIR)/../third-party/libpcre/include", + ); INFOPLIST_FILE = VimR/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; + OTHER_LDFLAGS = ( + "$(PROJECT_DIR)/../third-party/libxz/lib/liblzma.a", + "$(PROJECT_DIR)/../third-party/libpcre/lib/libpcre.a", + "$(PROJECT_DIR)/../third-party/libag/lib/libag.a", + ); PRODUCT_BUNDLE_IDENTIFIER = com.qvacua.VimR; PRODUCT_NAME = VimR; SWIFT_OBJC_BRIDGING_HEADER = VimR/Bridge.h; @@ -1955,8 +2012,17 @@ "$(inherited)", "$(PROJECT_DIR)/../Carthage/Build/Mac", ); + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../third-party/libxz/include", + "$(PROJECT_DIR)/../third-party/libpcre/include", + ); INFOPLIST_FILE = VimR/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; + OTHER_LDFLAGS = ( + "$(PROJECT_DIR)/../third-party/libxz/lib/liblzma.a", + "$(PROJECT_DIR)/../third-party/libpcre/lib/libpcre.a", + "$(PROJECT_DIR)/../third-party/libag/lib/libag.a", + ); PRODUCT_BUNDLE_IDENTIFIER = com.qvacua.VimR; PRODUCT_NAME = VimR; SWIFT_OBJC_BRIDGING_HEADER = VimR/Bridge.h; diff --git a/bin/build_deps.sh b/bin/build_deps.sh index 068366d5..6f68e7b5 100755 --- a/bin/build_deps.sh +++ b/bin/build_deps.sh @@ -7,12 +7,104 @@ pushd "$(dirname "${BASH_SOURCE[0]}")/.." > /dev/null readonly deployment_target_file="./resources/macos_deployment_target.txt" readonly deployment_target=$(cat ${deployment_target_file}) readonly gettext_version="0.20.1" +readonly pcre_version="8.43" +readonly xz_version="5.2.4" +readonly ag_version="2.2.0" +build_ag () { +pushd .deps > /dev/null + curl -L -o ag.tar.gz https://github.com/ggreer/the_silver_searcher/archive/${ag_version}.tar.gz + tar xf ag.tar.gz + mv the_silver_searcher-${ag_version} ag + + pushd ag > /dev/null + ./autogen.sh + + xz_include=$(pwd)/../../third-party/libxz/include + pcre_include=$(pwd)/../../third-party/libpcre/include + ./configure CFLAGS="-mmacosx-version-min=${deployment_target} -I${xz_include} -I${pcre_include}" \ + LDFLAGS="-L$(pwd)/../../third-party/libxz/lib -L$(pwd)/../../third-party/libpcre/lib" \ + MACOSX_DEPLOYMENT_TARGET=${deployment_target} + pushd src > /dev/null + cc -c ignore.c log.c options.c print.c scandir.c search.c lang.c util.c decompress.c zfile.c + ar -crs libag.a ignore.o log.o options.o print.o scandir.o search.o lang.o util.o decompress.o zfile.o + mkdir -p $(pwd)/../../../third-party/libag/lib + mv libag.a $(pwd)/../../../third-party/libag/lib + + mkdir -p $(pwd)/../../../third-party/libag/include + cp *.h $(pwd)/../../../third-party/libag/include + popd > /dev/null + popd > /dev/null +popd > /dev/null +} + +build_xz () { +pushd .deps > /dev/null + curl -L -o xz.tar.gz https://tukaani.org/xz/xz-${xz_version}.tar.gz + tar xf xz.tar.gz + mv xz-${xz_version} xz + + pushd xz > /dev/null + # configure from https://github.com/Homebrew/homebrew-core/blob/c9882801013d6bc5202b91ef56ff5838d18bbab2/Formula/xz.rb + ./configure CFLAGS="-mmacosx-version-min=${deployment_target}" MACOSX_DEPLOYMENT_TARGET=${deployment_target} \ + --disable-debug \ + --disable-dependency-tracking \ + --disable-silent-rules \ + --prefix=$(pwd)/../../third-party/libxz + make + make install + rm -rf $(pwd)/../../third-party/libxz/bin + rm -rf $(pwd)/../../third-party/libxz/share + popd > /dev/null +popd > /dev/null +} + +build_pcre () { +pushd .deps > /dev/null + curl -L -o pcre.tar.bz2 https://ftp.pcre.org/pub/pcre/pcre-${pcre_version}.tar.bz2 + tar xf pcre.tar.bz2 + mv pcre-${pcre_version} pcre + + pushd pcre > /dev/null + # configure from https://github.com/Homebrew/homebrew-core/blob/c9882801013d6bc5202b91ef56ff5838d18bbab2/Formula/pcre.rb + ./configure CFLAGS="-mmacosx-version-min=${deployment_target}" MACOSX_DEPLOYMENT_TARGET=${deployment_target} \ + --disable-dependency-tracking \ + --prefix=$(pwd)/../../third-party/libpcre \ + --enable-utf8 \ + --enable-pcre8 \ + --enable-pcre16 \ + --enable-pcre32 \ + --enable-unicode-properties \ + --enable-pcregrep-libz \ + --enable-pcregrep-libbz2 \ + --enable-jit + make + make install + rm -rf $(pwd)/../../third-party/libpcre/bin + rm -rf $(pwd)/../../third-party/libpcre/share + popd > /dev/null +popd > /dev/null +} + +build_vimr_deps () { +rm -rf third-party +mkdir third-party + +rm -rf .deps +mkdir .deps + +build_pcre +build_xz +build_ag +} + +build_gettext () { pushd NvimView > /dev/null + mkdir -p third-party/libintl rm -rf .deps mkdir .deps pushd .deps > /dev/null - curl -o gettext.tar.xz https://ftp.gnu.org/gnu/gettext/gettext-${gettext_version}.tar.xz + curl -L -o gettext.tar.xz https://ftp.gnu.org/gnu/gettext/gettext-${gettext_version}.tar.xz tar xf gettext.tar.xz mv gettext-${gettext_version} gettext @@ -52,3 +144,7 @@ popd > /dev/null popd > /dev/null echo "### Built deps" +} + +#build_gettext +build_vimr_deps diff --git a/third-party/libag/include/config.h b/third-party/libag/include/config.h new file mode 100644 index 00000000..dec29aea --- /dev/null +++ b/third-party/libag/include/config.h @@ -0,0 +1,120 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + +/* Have dirent struct member d_namlen */ +#define HAVE_DIRENT_DNAMLEN /**/ + +/* Have dirent struct member d_type */ +#define HAVE_DIRENT_DTYPE /**/ + +/* Define to 1 if you have the header file. */ +#define HAVE_ERR_H 1 + +/* Define to 1 if you have the `fgetln' function. */ +#define HAVE_FGETLN 1 + +/* Define to 1 if you have the `fopencookie' function. */ +/* #undef HAVE_FOPENCOOKIE */ + +/* Define to 1 if you have the `getline' function. */ +#define HAVE_GETLINE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `shlwapi' library (-lshlwapi). */ +/* #undef HAVE_LIBSHLWAPI */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LZMA_H 1 + +/* Define to 1 if you have the `madvise' function. */ +#define HAVE_MADVISE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `pledge' function. */ +/* #undef HAVE_PLEDGE */ + +/* Define to 1 if you have the `posix_fadvise' function. */ +/* #undef HAVE_POSIX_FADVISE */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Have PTHREAD_PRIO_INHERIT. */ +#define HAVE_PTHREAD_PRIO_INHERIT 1 + +/* Define to 1 if you have the `pthread_setaffinity_np' function. */ +/* #undef HAVE_PTHREAD_SETAFFINITY_NP */ + +/* Define to 1 if you have the `realpath' function. */ +#define HAVE_REALPATH 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strlcpy' function. */ +#define HAVE_STRLCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_CPUSET_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `vasprintf' function. */ +#define HAVE_VASPRINTF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ZLIB_H 1 + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "https://github.com/ggreer/the_silver_searcher/issues" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "the_silver_searcher" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "the_silver_searcher 2.2.0" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "the_silver_searcher" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "https://github.com/ggreer/the_silver_searcher" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.2.0" + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Use CPU_SET macros */ +/* #undef USE_CPU_SET */ + +/* Use PCRE JIT */ +#define USE_PCRE_JIT /**/ diff --git a/third-party/libag/include/decompress.h b/third-party/libag/include/decompress.h new file mode 100644 index 00000000..9c5592cf --- /dev/null +++ b/third-party/libag/include/decompress.h @@ -0,0 +1,26 @@ +#ifndef DECOMPRESS_H +#define DECOMPRESS_H + +#include + +#include "config.h" +#include "log.h" +#include "options.h" + +typedef enum { + AG_NO_COMPRESSION, + AG_GZIP, + AG_COMPRESS, + AG_ZIP, + AG_XZ, +} ag_compression_type; + +ag_compression_type is_zipped(const void *buf, const int buf_len); + +void *decompress(const ag_compression_type zip_type, const void *buf, const int buf_len, const char *dir_full_path, int *new_buf_len); + +#if HAVE_FOPENCOOKIE +FILE *decompress_open(int fd, const char *mode, ag_compression_type ctype); +#endif + +#endif diff --git a/third-party/libag/include/ignore.h b/third-party/libag/include/ignore.h new file mode 100644 index 00000000..20d5a6af --- /dev/null +++ b/third-party/libag/include/ignore.h @@ -0,0 +1,48 @@ +#ifndef IGNORE_H +#define IGNORE_H + +#include +#include + +struct ignores { + char **extensions; /* File extensions to ignore */ + size_t extensions_len; + + char **names; /* Non-regex ignore lines. Sorted so we can binary search them. */ + size_t names_len; + char **slash_names; /* Same but starts with a slash */ + size_t slash_names_len; + + char **regexes; /* For patterns that need fnmatch */ + size_t regexes_len; + char **invert_regexes; /* For "!" patterns */ + size_t invert_regexes_len; + char **slash_regexes; + size_t slash_regexes_len; + + const char *dirname; + size_t dirname_len; + char *abs_path; + size_t abs_path_len; + + struct ignores *parent; +}; +typedef struct ignores ignores; + +ignores *root_ignores; + +extern const char *evil_hardcoded_ignore_files[]; +extern const char *ignore_pattern_files[]; + +ignores *init_ignore(ignores *parent, const char *dirname, const size_t dirname_len); +void cleanup_ignore(ignores *ig); + +void add_ignore_pattern(ignores *ig, const char *pattern); + +void load_ignore_patterns(ignores *ig, const char *path); + +int filename_filter(const char *path, const struct dirent *dir, void *baton); + +int is_empty(ignores *ig); + +#endif diff --git a/third-party/libag/include/lang.h b/third-party/libag/include/lang.h new file mode 100644 index 00000000..d0007f94 --- /dev/null +++ b/third-party/libag/include/lang.h @@ -0,0 +1,36 @@ +#ifndef LANG_H +#define LANG_H + +#define MAX_EXTENSIONS 12 +#define SINGLE_EXT_LEN 20 + +typedef struct { + const char *name; + const char *extensions[MAX_EXTENSIONS]; +} lang_spec_t; + +extern lang_spec_t langs[]; + +/** + Return the language count. + */ +size_t get_lang_count(void); + +/** +Convert a NULL-terminated array of language extensions +into a regular expression of the form \.(extension1|extension2...)$ + +Caller is responsible for freeing the returned string. +*/ +char *make_lang_regex(char *ext_array, size_t num_exts); + + +/** +Combine multiple file type extensions into one array. + +The combined result is returned through *exts*; +*exts* is one-dimension array, which can contain up to 100 extensions; +The number of extensions that *exts* actually contain is returned. +*/ +size_t combine_file_extensions(size_t *extension_index, size_t len, char **exts); +#endif diff --git a/third-party/libag/include/log.h b/third-party/libag/include/log.h new file mode 100644 index 00000000..85847ee7 --- /dev/null +++ b/third-party/libag/include/log.h @@ -0,0 +1,32 @@ +#ifndef LOG_H +#define LOG_H + +#include + +#include "config.h" + +#ifdef HAVE_PTHREAD_H +#include +#endif + +pthread_mutex_t print_mtx; + +enum log_level { + LOG_LEVEL_DEBUG = 10, + LOG_LEVEL_MSG = 20, + LOG_LEVEL_WARN = 30, + LOG_LEVEL_ERR = 40, + LOG_LEVEL_NONE = 100 +}; + +void set_log_level(enum log_level threshold); + +void log_debug(const char *fmt, ...); +void log_msg(const char *fmt, ...); +void log_warn(const char *fmt, ...); +void log_err(const char *fmt, ...); + +void vplog(const unsigned int level, const char *fmt, va_list args); +void plog(const unsigned int level, const char *fmt, ...); + +#endif diff --git a/third-party/libag/include/options.h b/third-party/libag/include/options.h new file mode 100644 index 00000000..db3e8961 --- /dev/null +++ b/third-party/libag/include/options.h @@ -0,0 +1,105 @@ +#ifndef OPTIONS_H +#define OPTIONS_H + +#include +#include + +#include + +#define DEFAULT_AFTER_LEN 2 +#define DEFAULT_BEFORE_LEN 2 +#define DEFAULT_CONTEXT_LEN 2 +#define DEFAULT_MAX_SEARCH_DEPTH 25 +enum case_behavior { + CASE_DEFAULT, /* Changes to CASE_SMART at the end of option parsing */ + CASE_SENSITIVE, + CASE_INSENSITIVE, + CASE_SMART, + CASE_SENSITIVE_RETRY_INSENSITIVE /* for future use */ +}; + +enum path_print_behavior { + PATH_PRINT_DEFAULT, /* PRINT_TOP if > 1 file being searched, else PRINT_NOTHING */ + PATH_PRINT_DEFAULT_EACH_LINE, /* PRINT_EACH_LINE if > 1 file being searched, else PRINT_NOTHING */ + PATH_PRINT_TOP, + PATH_PRINT_EACH_LINE, + PATH_PRINT_NOTHING +}; + +typedef struct { + int ackmate; + pcre *ackmate_dir_filter; + pcre_extra *ackmate_dir_filter_extra; + size_t after; + size_t before; + enum case_behavior casing; + const char *file_search_string; + int match_files; + pcre *file_search_regex; + pcre_extra *file_search_regex_extra; + int color; + char *color_line_number; + char *color_match; + char *color_path; + int color_win_ansi; + int column; + int context; + int follow_symlinks; + int invert_match; + int literal; + int literal_starts_wordchar; + int literal_ends_wordchar; + size_t max_matches_per_file; + int max_search_depth; + int mmap; + int multiline; + int one_dev; + int only_matching; + char path_sep; + int path_to_ignore; + int print_break; + int print_count; + int print_filename_only; + int print_path; + int print_all_paths; + int print_line_numbers; + int print_long_lines; /* TODO: support this in print.c */ + int passthrough; + pcre *re; + pcre_extra *re_extra; + int recurse_dirs; + int search_all_files; + int skip_vcs_ignores; + int search_binary_files; + int search_zip_files; + int search_hidden_files; + int search_stream; /* true if tail -F blah | ag */ + int stats; + size_t stream_line_num; /* This should totally not be in here */ + int match_found; /* This should totally not be in here */ + ino_t stdout_inode; + char *query; + int query_len; + char *pager; + int paths_len; + int parallel; + int use_thread_affinity; + int vimgrep; + size_t width; + int word_regexp; + int workers; +} cli_options; + +/* global options. parse_options gives it sane values, everything else reads from it */ +cli_options opts; + +typedef struct option option_t; + +void usage(void); +void print_version(void); + +void init_options(void); +void parse_options(int argc, char **argv, char **base_paths[], char **paths[]); +void cleanup_options(void); + +#endif diff --git a/third-party/libag/include/print.h b/third-party/libag/include/print.h new file mode 100644 index 00000000..3f21dfc3 --- /dev/null +++ b/third-party/libag/include/print.h @@ -0,0 +1,26 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "util.h" + +void print_init_context(void); +void print_cleanup_context(void); +void print_context_append(const char *line, size_t len); +void print_trailing_context(const char *path, const char *buf, size_t n); +void print_path(const char *path, const char sep); +void print_path_count(const char *path, const char sep, const size_t count); +void print_line(const char *buf, size_t buf_pos, size_t prev_line_offset); +void print_binary_file_matches(const char *path); +void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len); +void print_line_number(size_t line, const char sep); +void print_column_number(const match_t matches[], size_t last_printed_match, + size_t prev_line_offset, const char sep); +void print_file_separator(void); +const char *normalize_path(const char *path); + +#ifdef _WIN32 +void windows_use_ansi(int use_ansi); +int fprintf_w32(FILE *fp, const char *format, ...); +#endif + +#endif diff --git a/third-party/libag/include/scandir.h b/third-party/libag/include/scandir.h new file mode 100644 index 00000000..6ebcf988 --- /dev/null +++ b/third-party/libag/include/scandir.h @@ -0,0 +1,20 @@ +#ifndef SCANDIR_H +#define SCANDIR_H + +#include "ignore.h" + +typedef struct { + const ignores *ig; + const char *base_path; + size_t base_path_len; + const char *path_start; +} scandir_baton_t; + +typedef int (*filter_fp)(const char *path, const struct dirent *, void *); + +int ag_scandir(const char *dirname, + struct dirent ***namelist, + filter_fp filter, + void *baton); + +#endif diff --git a/third-party/libag/include/search.h b/third-party/libag/include/search.h new file mode 100644 index 00000000..10711149 --- /dev/null +++ b/third-party/libag/include/search.h @@ -0,0 +1,78 @@ +#ifndef SEARCH_H +#define SEARCH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include +#include + +#include "config.h" + +#ifdef HAVE_PTHREAD_H +#include +#endif + +#include "decompress.h" +#include "ignore.h" +#include "log.h" +#include "options.h" +#include "print.h" +#include "uthash.h" +#include "util.h" + +size_t alpha_skip_lookup[256]; +size_t *find_skip_lookup; +uint8_t h_table[H_SIZE] __attribute__((aligned(64))); + +struct work_queue_t { + char *path; + struct work_queue_t *next; +}; +typedef struct work_queue_t work_queue_t; + +work_queue_t *work_queue; +work_queue_t *work_queue_tail; +int done_adding_files; +pthread_cond_t files_ready; +pthread_mutex_t stats_mtx; +pthread_mutex_t work_queue_mtx; + + +/* For symlink loop detection */ +#define SYMLOOP_ERROR (-1) +#define SYMLOOP_OK (0) +#define SYMLOOP_LOOP (1) + +typedef struct { + dev_t dev; + ino_t ino; +} dirkey_t; + +typedef struct { + dirkey_t key; + UT_hash_handle hh; +} symdir_t; + +symdir_t *symhash; + +void search_buf(const char *buf, const size_t buf_len, + const char *dir_full_path); +void search_stream(FILE *stream, const char *path); +void search_file(const char *file_full_path); + +void *search_file_worker(void *i); + +void search_dir(ignores *ig, const char *base_path, const char *path, const int depth, dev_t original_dev); + +#endif diff --git a/third-party/libag/include/uthash.h b/third-party/libag/include/uthash.h new file mode 100644 index 00000000..9e7fefd0 --- /dev/null +++ b/third-party/libag/include/uthash.h @@ -0,0 +1,971 @@ +/* +Copyright (c) 2003-2014, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#include /* ptrdiff_t */ +#include /* exit() */ +#include /* memcmp,strlen */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#define DECLTYPE(x) +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst, src) \ + do { \ + char **_da_dst = (char **)(&(dst)); \ + *_da_dst = (char *)(src); \ + } while (0) +#else +#define DECLTYPE_ASSIGN(dst, src) \ + do { \ + (dst) = DECLTYPE(dst)(src); \ + } while (0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ +#if defined(_WIN32) +#if defined(_MSC_VER) && _MSC_VER >= 1600 +#include +#elif defined(__WATCOMC__) +#include +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif +#else +#include +#endif + +#define UTHASH_VERSION 1.9.9 + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr, sz) free(ptr) /* free fcn */ +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhe */ +#define ELMT_FROM_HH(tbl, hhp) ((void *)(((char *)(hhp)) - ((tbl)->hho))) + +#define HASH_FIND(hh, head, keyptr, keylen, out) \ + do { \ + unsigned _hf_bkt, _hf_hashv; \ + out = NULL; \ + if (head) { \ + HASH_FCN(keyptr, keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[_hf_bkt], keyptr, keylen, out); \ + } \ + } \ + } while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN / 8) + ((HASH_BLOOM_BITLEN % 8) ? 1 : 0) +#define HASH_BLOOM_MAKE(tbl) \ + do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t *)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { \ + uthash_fatal("out of memory"); \ + } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ + } while (0) + +#define HASH_BLOOM_FREE(tbl) \ + do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + } while (0) + +#define HASH_BLOOM_BITSET(bv, idx) (bv[(idx) / 8] |= (1U << ((idx) % 8))) +#define HASH_BLOOM_BITTEST(bv, idx) (bv[(idx) / 8] & (1U << ((idx) % 8))) + +#define HASH_BLOOM_ADD(tbl, hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#define HASH_BLOOM_TEST(tbl, hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl, hashv) +#define HASH_BLOOM_TEST(tbl, hashv) (1) +#define HASH_BLOOM_BYTELEN 0 +#endif + +#define HASH_MAKE_TABLE(hh, head) \ + do { \ + (head)->hh.tbl = (UT_hash_table *)uthash_malloc(sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { \ + uthash_fatal("out of memory"); \ + } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char *)(&(head)->hh) - (char *)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket *)uthash_malloc(HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + if (!(head)->hh.tbl->buckets) { \ + uthash_fatal("out of memory"); \ + } \ + memset((head)->hh.tbl->buckets, 0, HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ + } while (0) + +#define HASH_ADD(hh, head, fieldname, keylen_in, add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_REPLACE(hh, head, fieldname, keylen_in, add, replaced) \ + do { \ + replaced = NULL; \ + HASH_FIND(hh, head, &((add)->fieldname), keylen_in, replaced); \ + if (replaced != NULL) { \ + HASH_DELETE(hh, head, replaced); \ + }; \ + HASH_ADD(hh, head, fieldname, keylen_in, add); \ + } while (0) + +#define HASH_ADD_KEYPTR(hh, head, keyptr, keylen_in, add) \ + do { \ + unsigned _ha_bkt; \ + (add)->hh.next = NULL; \ + (add)->hh.key = (char *)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) { \ + head = (add); \ + (head)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + (head)->hh.tbl->tail->next = (add); \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } \ + (head)->hh.tbl->num_items++; \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_FCN(keyptr, keylen_in, (head)->hh.tbl->num_buckets, (add)->hh.hashv, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, (add)->hh.hashv); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ + } while (0) + +#define HASH_TO_BKT(hashv, num_bkts, bkt) \ + do { \ + bkt = ((hashv) & (num_bkts - 1)); \ + } while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh, head, delptr) \ + do { \ + unsigned _hd_bkt; \ + struct UT_hash_handle *_hd_hh_del; \ + if (((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL)) { \ + uthash_free((head)->hh.tbl->buckets, (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = (UT_hash_handle *)((ptrdiff_t)((delptr)->hh.prev) + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev) { \ + ((UT_hash_handle *)((ptrdiff_t)((delptr)->hh.prev) + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head, (delptr)->hh.next); \ + } \ + if (_hd_hh_del->next) { \ + ((UT_hash_handle *)((ptrdiff_t)_hd_hh_del->next + (head)->hh.tbl->hho))->prev = _hd_hh_del->prev; \ + } \ + HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh, (head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh, head); \ + } while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head, findstr, out) \ + HASH_FIND(hh, head, findstr, strlen(findstr), out) +#define HASH_ADD_STR(head, strfield, add) \ + HASH_ADD(hh, head, strfield[0], strlen(add->strfield), add) +#define HASH_REPLACE_STR(head, strfield, add, replaced) \ + HASH_REPLACE(hh, head, strfield[0], strlen(add->strfield), add, replaced) +#define HASH_FIND_INT(head, findint, out) \ + HASH_FIND(hh, head, findint, sizeof(int), out) +#define HASH_ADD_INT(head, intfield, add) \ + HASH_ADD(hh, head, intfield, sizeof(int), add) +#define HASH_REPLACE_INT(head, intfield, add, replaced) \ + HASH_REPLACE(hh, head, intfield, sizeof(int), add, replaced) +#define HASH_FIND_PTR(head, findptr, out) \ + HASH_FIND(hh, head, findptr, sizeof(void *), out) +#define HASH_ADD_PTR(head, ptrfield, add) \ + HASH_ADD(hh, head, ptrfield, sizeof(void *), add) +#define HASH_REPLACE_PTR(head, ptrfield, add, replaced) \ + HASH_REPLACE(hh, head, ptrfield, sizeof(void *), add, replaced) +#define HASH_DEL(head, delptr) \ + HASH_DELETE(hh, head, delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) \ + do { \ + fprintf(stderr, __VA_ARGS__); \ + exit(-1); \ + } while (0) +#define HASH_FSCK(hh, head) \ + do { \ + unsigned _bkt_i; \ + unsigned _count, _bkt_count; \ + char *_prev; \ + struct UT_hash_handle *_thh; \ + if (head) { \ + _count = 0; \ + for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char *)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", _thh->hh_prev, _prev); \ + } \ + _bkt_count++; \ + _prev = (char *)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %d, actual %d\n", (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %d, actual %d\n", (head)->hh.tbl->num_items, _count); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev != (char *)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", _thh->prev, _prev); \ + } \ + _prev = (char *)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = (_thh->next ? (UT_hash_handle *)((char *)(_thh->next) + (head)->hh.tbl->hho) : NULL); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %d, actual %d\n", (head)->hh.tbl->num_items, _count); \ + } \ + } \ + } while (0) +#else +#define HASH_FSCK(hh, head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) \ + do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, fieldlen); \ + } while (0) +#else +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key, keylen, num_bkts, hashv, bkt) \ + do { \ + unsigned _hb_keylen = keylen; \ + char *_hb_key = (char *)(key); \ + (hashv) = 0; \ + while (_hb_keylen--) { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ + bkt = (hashv) & (num_bkts - 1); \ + } while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key, keylen, num_bkts, hashv, bkt) \ + do { \ + unsigned _sx_i; \ + char *_hs_key = (char *)(key); \ + hashv = 0; \ + for (_sx_i = 0; _sx_i < keylen; _sx_i++) \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + bkt = hashv & (num_bkts - 1); \ + } while (0) +/* FNV-1a variation */ +#define HASH_FNV(key, keylen, num_bkts, hashv, bkt) \ + do { \ + unsigned _fn_i; \ + char *_hf_key = (char *)(key); \ + hashv = 2166136261UL; \ + for (_fn_i = 0; _fn_i < keylen; _fn_i++) \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619; \ + bkt = hashv & (num_bkts - 1); \ + } while (0) + +#define HASH_OAT(key, keylen, num_bkts, hashv, bkt) \ + do { \ + unsigned _ho_i; \ + char *_ho_key = (char *)(key); \ + hashv = 0; \ + for (_ho_i = 0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + bkt = hashv & (num_bkts - 1); \ + } while (0) + +#define HASH_JEN_MIX(a, b, c) \ + do { \ + a -= b; \ + a -= c; \ + a ^= (c >> 13); \ + b -= c; \ + b -= a; \ + b ^= (a << 8); \ + c -= a; \ + c -= b; \ + c ^= (b >> 13); \ + a -= b; \ + a -= c; \ + a ^= (c >> 12); \ + b -= c; \ + b -= a; \ + b ^= (a << 16); \ + c -= a; \ + c -= b; \ + c ^= (b >> 5); \ + a -= b; \ + a -= c; \ + a ^= (c >> 3); \ + b -= c; \ + b -= a; \ + b ^= (a << 10); \ + c -= a; \ + c -= b; \ + c ^= (b >> 15); \ + } while (0) + +#define HASH_JEN(key, keylen, num_bkts, hashv, bkt) \ + do { \ + unsigned _hj_i, _hj_j, _hj_k; \ + unsigned char *_hj_key = (unsigned char *)(key); \ + hashv = 0xfeedbeef; \ + _hj_i = _hj_j = 0x9e3779b9; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12) { \ + _hj_i += (_hj_key[0] + ((unsigned)_hj_key[1] << 8) + ((unsigned)_hj_key[2] << 16) + ((unsigned)_hj_key[3] << 24)); \ + _hj_j += (_hj_key[4] + ((unsigned)_hj_key[5] << 8) + ((unsigned)_hj_key[6] << 16) + ((unsigned)_hj_key[7] << 24)); \ + hashv += (_hj_key[8] + ((unsigned)_hj_key[9] << 8) + ((unsigned)_hj_key[10] << 16) + ((unsigned)_hj_key[11] << 24)); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12; \ + } \ + hashv += keylen; \ + switch (_hj_k) { \ + case 11: \ + hashv += ((unsigned)_hj_key[10] << 24); \ + /* fall through */ \ + case 10: \ + hashv += ((unsigned)_hj_key[9] << 16); \ + /* fall through */ \ + case 9: \ + hashv += ((unsigned)_hj_key[8] << 8); \ + /* fall through */ \ + case 8: \ + _hj_j += ((unsigned)_hj_key[7] << 24); \ + /* fall through */ \ + case 7: \ + _hj_j += ((unsigned)_hj_key[6] << 16); \ + /* fall through */ \ + case 6: \ + _hj_j += ((unsigned)_hj_key[5] << 8); \ + /* fall through */ \ + case 5: \ + _hj_j += _hj_key[4]; \ + /* fall through */ \ + case 4: \ + _hj_i += ((unsigned)_hj_key[3] << 24); \ + /* fall through */ \ + case 3: \ + _hj_i += ((unsigned)_hj_key[2] << 16); \ + /* fall through */ \ + case 2: \ + _hj_i += ((unsigned)_hj_key[1] << 8); \ + /* fall through */ \ + case 1: \ + _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + bkt = hashv & (num_bkts - 1); \ + } while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) || defined(_MSC_VER) || defined(__BORLANDC__) || defined(__TURBOC__) +#define get16bits(d) (*((const uint16_t *)(d))) +#endif + +#if !defined(get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) + (uint32_t)(((const uint8_t *)(d))[0])) +#endif +#define HASH_SFH(key, keylen, num_bkts, hashv, bkt) \ + do { \ + unsigned char *_sfh_key = (unsigned char *)(key); \ + uint32_t _sfh_tmp, _sfh_len = keylen; \ + \ + int _sfh_rem = _sfh_len & 3; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabe; \ + \ + /* Main loop */ \ + for (; _sfh_len > 0; _sfh_len--) { \ + hashv += get16bits(_sfh_key); \ + _sfh_tmp = (uint32_t)(get16bits(_sfh_key + 2)) << 11 ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2 * sizeof(uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof(uint16_t)] << 18); \ + hashv += hashv >> 11; \ + break; \ + case 2: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: \ + hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + bkt = hashv & (num_bkts - 1); \ + } while (0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) +#define MUR_GETBLOCK(p, i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) +#define WP(p) ((uint32_t *)((unsigned long)(p) & ~3UL)) +#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) +#define MUR_THREE_ONE(p) ((((*WP(p)) & 0x00ffffff) << 8) | (((*(WP(p) + 1)) & 0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p)) & 0x0000ffff) << 16) | (((*(WP(p) + 1)) & 0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p)) & 0x000000ff) << 24) | (((*(WP(p) + 1)) & 0xffffff00) >> 8)) +#else /* assume little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p)) & 0xffffff00) >> 8) | (((*(WP(p) + 1)) & 0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p)) & 0xffff0000) >> 16) | (((*(WP(p) + 1)) & 0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p)) & 0xff000000) >> 24) | (((*(WP(p) + 1)) & 0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p, i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ + do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6b; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35l; \ + _h ^= _h >> 16; \ + } while (0) + +#define HASH_MUR(key, keylen, num_bkts, hashv, bkt) \ + do { \ + const uint8_t *_mur_data = (const uint8_t *)(key); \ + const int _mur_nblocks = (keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353; \ + uint32_t _mur_c1 = 0xcc9e2d51; \ + uint32_t _mur_c2 = 0x1b873593; \ + uint32_t _mur_k1 = 0; \ + const uint8_t *_mur_tail; \ + const uint32_t *_mur_blocks = (const uint32_t *)(_mur_data + _mur_nblocks * 4); \ + int _mur_i; \ + for (_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ + _mur_k1 = MUR_GETBLOCK(_mur_blocks, _mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1, 15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1, 13); \ + _mur_h1 = _mur_h1 * 5 + 0xe6546b64; \ + } \ + _mur_tail = (const uint8_t *)(_mur_data + _mur_nblocks * 4); \ + _mur_k1 = 0; \ + switch (keylen & 3) { \ + case 3: \ + _mur_k1 ^= _mur_tail[2] << 16; \ + case 2: \ + _mur_k1 ^= _mur_tail[1] << 8; \ + case 1: \ + _mur_k1 ^= _mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1, 15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ + bkt = hashv & (num_bkts - 1); \ + } while (0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* key comparison function; return 0 if keys equal */ +#define HASH_KEYCMP(a, b, len) memcmp(a, b, len) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl, hh, head, keyptr, keylen_in, out) \ + do { \ + if (head.hh_head) \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, head.hh_head)); \ + else \ + out = NULL; \ + while (out) { \ + if ((out)->hh.keylen == keylen_in) { \ + if ((HASH_KEYCMP((out)->hh.key, keyptr, keylen_in)) == 0) \ + break; \ + } \ + if ((out)->hh.hh_next) \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + else \ + out = NULL; \ + } \ + } while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head, addhh) \ + do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head) { \ + (head).hh_head->hh_prev = (addhh); \ + } \ + (head).hh_head = addhh; \ + if (head.count >= ((head.expand_mult + 1) * HASH_BKT_CAPACITY_THRESH) && (addhh)->tbl->noexpand != 1) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ + } while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh, head, hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ + do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket *)uthash_malloc(2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { \ + uthash_fatal("out of memory"); \ + } \ + memset(_he_new_buckets, 0, 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = (tbl->num_items >> (tbl->log2_num_buckets + 1)) + ((tbl->num_items & ((tbl->num_buckets * 2) - 1)) ? 1 : 0); \ + tbl->nonideal_items = 0; \ + for (_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) { \ + _he_thh = tbl->buckets[_he_bkt_i].hh_head; \ + while (_he_thh) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT(_he_thh->hashv, tbl->num_buckets * 2, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[_he_bkt]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head) \ + _he_newbkt->hh_head->hh_prev = _he_thh; \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free(tbl->buckets, tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->num_buckets *= 2; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? (tbl->ineff_expands + 1) : 0; \ + if (tbl->ineff_expands > 1) { \ + tbl->noexpand = 1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ + } while (0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head, cmpfcn) HASH_SRT(hh, head, cmpfcn) +#define HASH_SRT(hh, head, cmpfcn) \ + do { \ + unsigned _hs_i; \ + unsigned _hs_looping, _hs_nmerges, _hs_insize, _hs_psize, _hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for (_hs_i = 0; _hs_i < _hs_insize; _hs_i++) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle *)((_hs_q->next) ? ((void *)((char *)(_hs_q->next) + (head)->hh.tbl->hho)) : NULL); \ + if (!(_hs_q)) \ + break; \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q)) { \ + if (_hs_psize == 0) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle *)((_hs_q->next) ? ((void *)((char *)(_hs_q->next) + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ((_hs_qsize == 0) || !(_hs_q)) { \ + _hs_e = _hs_p; \ + if (_hs_p) { \ + _hs_p = (UT_hash_handle *)((_hs_p->next) ? ((void *)((char *)(_hs_p->next) + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else if ((cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)))) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p) { \ + _hs_p = (UT_hash_handle *)((_hs_p->next) ? ((void *)((char *)(_hs_p->next) + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle *)((_hs_q->next) ? ((void *)((char *)(_hs_q->next) + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if (_hs_tail) { \ + _hs_tail->next = ((_hs_e) ? ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e) { \ + _hs_e->prev = ((_hs_tail) ? ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail) { \ + _hs_tail->next = NULL; \ + } \ + if (_hs_nmerges <= 1) { \ + _hs_looping = 0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2; \ + } \ + HASH_FSCK(hh, head); \ + } \ + } while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ + do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt = NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh = NULL; \ + ptrdiff_t _dst_hho = ((char *)(&(dst)->hh_dst) - (char *)(dst)); \ + if (src) { \ + for (_src_bkt = 0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; _src_hh; _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle *)(((char *)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh) { \ + _last_elt_hh->next = _elt; \ + } \ + if (!dst) { \ + DECLTYPE_ASSIGN(dst, _elt); \ + HASH_MAKE_TABLE(hh_dst, dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], _dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst, dst); \ + } while (0) + +#define HASH_CLEAR(hh, head) \ + do { \ + if (head) { \ + uthash_free((head)->hh.tbl->buckets, (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } \ + } while (0) + +#define HASH_OVERHEAD(hh, head) \ + (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + (sizeof(UT_hash_table)) + \ + (HASH_BLOOM_BYTELEN))) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh, head, el, tmp) for ((el) = (head), (*(char **)(&(tmp))) = (char *)((head) ? (head)->hh.next : NULL); \ + el; (el) = (tmp), (*(char **)(&(tmp))) = (char *)((tmp) ? (tmp)->hh.next : NULL)) +#else +#define HASH_ITER(hh, head, el, tmp) for ((el) = (head), (tmp) = DECLTYPE(el)((head) ? (head)->hh.next : NULL); \ + el; (el) = (tmp), (tmp) = DECLTYPE(el)((tmp) ? (tmp)->hh.next : NULL)) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh, head) +#define HASH_CNT(hh, head) ((head) ? ((head)->hh.tbl->num_items) : 0) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1 +#define HASH_BLOOM_SIGNATURE 0xb12220f2 + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + char bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/third-party/libag/include/util.h b/third-party/libag/include/util.h new file mode 100644 index 00000000..0c9b9b11 --- /dev/null +++ b/third-party/libag/include/util.h @@ -0,0 +1,119 @@ +#ifndef UTIL_H +#define UTIL_H + +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "log.h" +#include "options.h" + +FILE *out_fd; + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#define H_SIZE (64 * 1024) + +#ifdef __clang__ +#define NO_SANITIZE_ALIGNMENT __attribute__((no_sanitize("alignment"))) +#else +#define NO_SANITIZE_ALIGNMENT +#endif + +void *ag_malloc(size_t size); +void *ag_realloc(void *ptr, size_t size); +void *ag_calloc(size_t nelem, size_t elsize); +char *ag_strdup(const char *s); +char *ag_strndup(const char *s, size_t size); + +typedef struct { + size_t start; /* Byte at which the match starts */ + size_t end; /* and where it ends */ +} match_t; + +typedef struct { + size_t total_bytes; + size_t total_files; + size_t total_matches; + size_t total_file_matches; + struct timeval time_start; + struct timeval time_end; +} ag_stats; + + +ag_stats stats; + +/* Union to translate between chars and words without violating strict aliasing */ +typedef union { + char as_chars[sizeof(uint16_t)]; + uint16_t as_word; +} word_t; + +void free_strings(char **strs, const size_t strs_len); + +void generate_alpha_skip(const char *find, size_t f_len, size_t skip_lookup[], const int case_sensitive); +int is_prefix(const char *s, const size_t s_len, const size_t pos, const int case_sensitive); +size_t suffix_len(const char *s, const size_t s_len, const size_t pos, const int case_sensitive); +void generate_find_skip(const char *find, const size_t f_len, size_t **skip_lookup, const int case_sensitive); +void generate_hash(const char *find, const size_t f_len, uint8_t *H, const int case_sensitive); + +/* max is already defined on spec-violating compilers such as MinGW */ +size_t ag_max(size_t a, size_t b); +size_t ag_min(size_t a, size_t b); + +const char *boyer_moore_strnstr(const char *s, const char *find, const size_t s_len, const size_t f_len, + const size_t alpha_skip_lookup[], const size_t *find_skip_lookup, const int case_insensitive); +const char *hash_strnstr(const char *s, const char *find, const size_t s_len, const size_t f_len, uint8_t *h_table, const int case_sensitive); + +size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[], size_t matches_len); +void realloc_matches(match_t **matches, size_t *matches_size, size_t matches_len); +void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts); + + +int is_binary(const void *buf, const size_t buf_len); +int is_regex(const char *query); +int is_fnmatch(const char *filename); +int binary_search(const char *needle, char **haystack, int start, int end); + +void init_wordchar_table(void); +int is_wordchar(char ch); + +int is_lowercase(const char *s); + +int is_directory(const char *path, const struct dirent *d); +int is_symlink(const char *path, const struct dirent *d); +int is_named_pipe(const char *path, const struct dirent *d); + +void die(const char *fmt, ...); + +void ag_asprintf(char **ret, const char *fmt, ...); + +ssize_t buf_getline(const char **line, const char *buf, const size_t buf_len, const size_t buf_offset); + +#ifndef HAVE_FGETLN +char *fgetln(FILE *fp, size_t *lenp); +#endif +#ifndef HAVE_GETLINE +ssize_t getline(char **lineptr, size_t *n, FILE *stream); +#endif +#ifndef HAVE_REALPATH +char *realpath(const char *path, char *resolved_path); +#endif +#ifndef HAVE_STRLCPY +size_t strlcpy(char *dest, const char *src, size_t size); +#endif +#ifndef HAVE_VASPRINTF +int vasprintf(char **ret, const char *fmt, va_list args); +#endif + +#endif diff --git a/third-party/libag/lib/libag.a b/third-party/libag/lib/libag.a new file mode 100644 index 00000000..b3ce6556 --- /dev/null +++ b/third-party/libag/lib/libag.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28799bb5b02366a972f5550c5408d238a28d01fce18e1ec015232a818a24cd0b +size 118440 diff --git a/third-party/libpcre/include/pcre.h b/third-party/libpcre/include/pcre.h new file mode 100644 index 00000000..b578eb11 --- /dev/null +++ b/third-party/libpcre/include/pcre.h @@ -0,0 +1,677 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* This is the public header file for the PCRE library, to be #included by +applications that call the PCRE functions. + + Copyright (c) 1997-2014 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef _PCRE_H +#define _PCRE_H + +/* The current PCRE version information. */ + +#define PCRE_MAJOR 8 +#define PCRE_MINOR 43 +#define PCRE_PRERELEASE +#define PCRE_DATE 2019-02-23 + +/* When an application links to a PCRE DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE, the appropriate +export setting is defined in pcre_internal.h, which includes this file. So we +don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */ + +#if defined(_WIN32) && !defined(PCRE_STATIC) +# ifndef PCRE_EXP_DECL +# define PCRE_EXP_DECL extern __declspec(dllimport) +# endif +# ifdef __cplusplus +# ifndef PCRECPP_EXP_DECL +# define PCRECPP_EXP_DECL extern __declspec(dllimport) +# endif +# ifndef PCRECPP_EXP_DEFN +# define PCRECPP_EXP_DEFN __declspec(dllimport) +# endif +# endif +#endif + +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCRE_EXP_DECL +# ifdef __cplusplus +# define PCRE_EXP_DECL extern "C" +# else +# define PCRE_EXP_DECL extern +# endif +#endif + +#ifdef __cplusplus +# ifndef PCRECPP_EXP_DECL +# define PCRECPP_EXP_DECL extern +# endif +# ifndef PCRECPP_EXP_DEFN +# define PCRECPP_EXP_DEFN +# endif +#endif + +/* Have to include stdlib.h in order to ensure that size_t is defined; +it is needed here for malloc. */ + +#include + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Public options. Some are compile-time only, some are run-time only, and some +are both. Most of the compile-time options are saved with the compiled regex so +that they can be inspected during studying (and therefore JIT compiling). Note +that pcre_study() has its own set of options. Originally, all the options +defined here used distinct bits. However, almost all the bits in a 32-bit word +are now used, so in order to conserve them, option bits that were previously +only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may +also be used for compile-time options that affect only compiling and are not +relevant for studying or JIT compiling. + +Some options for pcre_compile() change its behaviour but do not affect the +behaviour of the execution functions. Other options are passed through to the +execution functions and affect their behaviour, with or without affecting the +behaviour of pcre_compile(). + +Options that can be passed to pcre_compile() are tagged Cx below, with these +variants: + +C1 Affects compile only +C2 Does not affect compile; affects exec, dfa_exec +C3 Affects compile, exec, dfa_exec +C4 Affects compile, exec, dfa_exec, study +C5 Affects compile, exec, study + +Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with +E and D, respectively. They take precedence over C3, C4, and C5 settings passed +from pcre_compile(). Those that are compatible with JIT execution are flagged +with J. */ + +#define PCRE_CASELESS 0x00000001 /* C1 */ +#define PCRE_MULTILINE 0x00000002 /* C1 */ +#define PCRE_DOTALL 0x00000004 /* C1 */ +#define PCRE_EXTENDED 0x00000008 /* C1 */ +#define PCRE_ANCHORED 0x00000010 /* C4 E D */ +#define PCRE_DOLLAR_ENDONLY 0x00000020 /* C2 */ +#define PCRE_EXTRA 0x00000040 /* C1 */ +#define PCRE_NOTBOL 0x00000080 /* E D J */ +#define PCRE_NOTEOL 0x00000100 /* E D J */ +#define PCRE_UNGREEDY 0x00000200 /* C1 */ +#define PCRE_NOTEMPTY 0x00000400 /* E D J */ +#define PCRE_UTF8 0x00000800 /* C4 ) */ +#define PCRE_UTF16 0x00000800 /* C4 ) Synonyms */ +#define PCRE_UTF32 0x00000800 /* C4 ) */ +#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* C1 */ +#define PCRE_NO_UTF8_CHECK 0x00002000 /* C1 E D J ) */ +#define PCRE_NO_UTF16_CHECK 0x00002000 /* C1 E D J ) Synonyms */ +#define PCRE_NO_UTF32_CHECK 0x00002000 /* C1 E D J ) */ +#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */ +#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */ +#define PCRE_PARTIAL 0x00008000 /* E D J ) */ + +/* This pair use the same bit. */ +#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */ +#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */ + +/* This pair use the same bit. */ +#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */ +#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */ + +#define PCRE_FIRSTLINE 0x00040000 /* C3 */ +#define PCRE_DUPNAMES 0x00080000 /* C1 */ +#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */ +#define PCRE_NEWLINE_LF 0x00200000 /* C3 E D */ +#define PCRE_NEWLINE_CRLF 0x00300000 /* C3 E D */ +#define PCRE_NEWLINE_ANY 0x00400000 /* C3 E D */ +#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* C3 E D */ +#define PCRE_BSR_ANYCRLF 0x00800000 /* C3 E D */ +#define PCRE_BSR_UNICODE 0x01000000 /* C3 E D */ +#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* C5 */ +#define PCRE_NO_START_OPTIMIZE 0x04000000 /* C2 E D ) Synonyms */ +#define PCRE_NO_START_OPTIMISE 0x04000000 /* C2 E D ) */ +#define PCRE_PARTIAL_HARD 0x08000000 /* E D J */ +#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */ +#define PCRE_UCP 0x20000000 /* C3 */ + +/* Exec-time and get/set-time error codes */ + +#define PCRE_ERROR_NOMATCH (-1) +#define PCRE_ERROR_NULL (-2) +#define PCRE_ERROR_BADOPTION (-3) +#define PCRE_ERROR_BADMAGIC (-4) +#define PCRE_ERROR_UNKNOWN_OPCODE (-5) +#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */ +#define PCRE_ERROR_NOMEMORY (-6) +#define PCRE_ERROR_NOSUBSTRING (-7) +#define PCRE_ERROR_MATCHLIMIT (-8) +#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ +#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */ +#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */ +#define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */ +#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */ +#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */ +#define PCRE_ERROR_PARTIAL (-12) +#define PCRE_ERROR_BADPARTIAL (-13) +#define PCRE_ERROR_INTERNAL (-14) +#define PCRE_ERROR_BADCOUNT (-15) +#define PCRE_ERROR_DFA_UITEM (-16) +#define PCRE_ERROR_DFA_UCOND (-17) +#define PCRE_ERROR_DFA_UMLIMIT (-18) +#define PCRE_ERROR_DFA_WSSIZE (-19) +#define PCRE_ERROR_DFA_RECURSE (-20) +#define PCRE_ERROR_RECURSIONLIMIT (-21) +#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */ +#define PCRE_ERROR_BADNEWLINE (-23) +#define PCRE_ERROR_BADOFFSET (-24) +#define PCRE_ERROR_SHORTUTF8 (-25) +#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */ +#define PCRE_ERROR_RECURSELOOP (-26) +#define PCRE_ERROR_JIT_STACKLIMIT (-27) +#define PCRE_ERROR_BADMODE (-28) +#define PCRE_ERROR_BADENDIANNESS (-29) +#define PCRE_ERROR_DFA_BADRESTART (-30) +#define PCRE_ERROR_JIT_BADOPTION (-31) +#define PCRE_ERROR_BADLENGTH (-32) +#define PCRE_ERROR_UNSET (-33) + +/* Specific error codes for UTF-8 validity checks */ + +#define PCRE_UTF8_ERR0 0 +#define PCRE_UTF8_ERR1 1 +#define PCRE_UTF8_ERR2 2 +#define PCRE_UTF8_ERR3 3 +#define PCRE_UTF8_ERR4 4 +#define PCRE_UTF8_ERR5 5 +#define PCRE_UTF8_ERR6 6 +#define PCRE_UTF8_ERR7 7 +#define PCRE_UTF8_ERR8 8 +#define PCRE_UTF8_ERR9 9 +#define PCRE_UTF8_ERR10 10 +#define PCRE_UTF8_ERR11 11 +#define PCRE_UTF8_ERR12 12 +#define PCRE_UTF8_ERR13 13 +#define PCRE_UTF8_ERR14 14 +#define PCRE_UTF8_ERR15 15 +#define PCRE_UTF8_ERR16 16 +#define PCRE_UTF8_ERR17 17 +#define PCRE_UTF8_ERR18 18 +#define PCRE_UTF8_ERR19 19 +#define PCRE_UTF8_ERR20 20 +#define PCRE_UTF8_ERR21 21 +#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */ + +/* Specific error codes for UTF-16 validity checks */ + +#define PCRE_UTF16_ERR0 0 +#define PCRE_UTF16_ERR1 1 +#define PCRE_UTF16_ERR2 2 +#define PCRE_UTF16_ERR3 3 +#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */ + +/* Specific error codes for UTF-32 validity checks */ + +#define PCRE_UTF32_ERR0 0 +#define PCRE_UTF32_ERR1 1 +#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */ +#define PCRE_UTF32_ERR3 3 + +/* Request types for pcre_fullinfo() */ + +#define PCRE_INFO_OPTIONS 0 +#define PCRE_INFO_SIZE 1 +#define PCRE_INFO_CAPTURECOUNT 2 +#define PCRE_INFO_BACKREFMAX 3 +#define PCRE_INFO_FIRSTBYTE 4 +#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */ +#define PCRE_INFO_FIRSTTABLE 5 +#define PCRE_INFO_LASTLITERAL 6 +#define PCRE_INFO_NAMEENTRYSIZE 7 +#define PCRE_INFO_NAMECOUNT 8 +#define PCRE_INFO_NAMETABLE 9 +#define PCRE_INFO_STUDYSIZE 10 +#define PCRE_INFO_DEFAULT_TABLES 11 +#define PCRE_INFO_OKPARTIAL 12 +#define PCRE_INFO_JCHANGED 13 +#define PCRE_INFO_HASCRORLF 14 +#define PCRE_INFO_MINLENGTH 15 +#define PCRE_INFO_JIT 16 +#define PCRE_INFO_JITSIZE 17 +#define PCRE_INFO_MAXLOOKBEHIND 18 +#define PCRE_INFO_FIRSTCHARACTER 19 +#define PCRE_INFO_FIRSTCHARACTERFLAGS 20 +#define PCRE_INFO_REQUIREDCHAR 21 +#define PCRE_INFO_REQUIREDCHARFLAGS 22 +#define PCRE_INFO_MATCHLIMIT 23 +#define PCRE_INFO_RECURSIONLIMIT 24 +#define PCRE_INFO_MATCH_EMPTY 25 + +/* Request types for pcre_config(). Do not re-arrange, in order to remain +compatible. */ + +#define PCRE_CONFIG_UTF8 0 +#define PCRE_CONFIG_NEWLINE 1 +#define PCRE_CONFIG_LINK_SIZE 2 +#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 +#define PCRE_CONFIG_MATCH_LIMIT 4 +#define PCRE_CONFIG_STACKRECURSE 5 +#define PCRE_CONFIG_UNICODE_PROPERTIES 6 +#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7 +#define PCRE_CONFIG_BSR 8 +#define PCRE_CONFIG_JIT 9 +#define PCRE_CONFIG_UTF16 10 +#define PCRE_CONFIG_JITTARGET 11 +#define PCRE_CONFIG_UTF32 12 +#define PCRE_CONFIG_PARENS_LIMIT 13 + +/* Request types for pcre_study(). Do not re-arrange, in order to remain +compatible. */ + +#define PCRE_STUDY_JIT_COMPILE 0x0001 +#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002 +#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004 +#define PCRE_STUDY_EXTRA_NEEDED 0x0008 + +/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine +these bits, just add new ones on the end, in order to remain compatible. */ + +#define PCRE_EXTRA_STUDY_DATA 0x0001 +#define PCRE_EXTRA_MATCH_LIMIT 0x0002 +#define PCRE_EXTRA_CALLOUT_DATA 0x0004 +#define PCRE_EXTRA_TABLES 0x0008 +#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010 +#define PCRE_EXTRA_MARK 0x0020 +#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040 + +/* Types */ + +struct real_pcre8_or_16; /* declaration; the definition is private */ +typedef struct real_pcre8_or_16 pcre; + +struct real_pcre8_or_16; /* declaration; the definition is private */ +typedef struct real_pcre8_or_16 pcre16; + +struct real_pcre32; /* declaration; the definition is private */ +typedef struct real_pcre32 pcre32; + +struct real_pcre_jit_stack; /* declaration; the definition is private */ +typedef struct real_pcre_jit_stack pcre_jit_stack; + +struct real_pcre16_jit_stack; /* declaration; the definition is private */ +typedef struct real_pcre16_jit_stack pcre16_jit_stack; + +struct real_pcre32_jit_stack; /* declaration; the definition is private */ +typedef struct real_pcre32_jit_stack pcre32_jit_stack; + +/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain +a 16 bit wide signed data type. Otherwise it can be a dummy data type since +pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */ +#ifndef PCRE_UCHAR16 +#define PCRE_UCHAR16 unsigned short +#endif + +#ifndef PCRE_SPTR16 +#define PCRE_SPTR16 const PCRE_UCHAR16 * +#endif + +/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain +a 32 bit wide signed data type. Otherwise it can be a dummy data type since +pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */ +#ifndef PCRE_UCHAR32 +#define PCRE_UCHAR32 unsigned int +#endif + +#ifndef PCRE_SPTR32 +#define PCRE_SPTR32 const PCRE_UCHAR32 * +#endif + +/* When PCRE is compiled as a C++ library, the subject pointer type can be +replaced with a custom type. For conventional use, the public interface is a +const char *. */ + +#ifndef PCRE_SPTR +#define PCRE_SPTR const char * +#endif + +/* The structure for passing additional data to pcre_exec(). This is defined in +such as way as to be extensible. Always add new fields at the end, in order to +remain compatible. */ + +typedef struct pcre_extra { + unsigned long int flags; /* Bits for which fields are set */ + void *study_data; /* Opaque data from pcre_study() */ + unsigned long int match_limit; /* Maximum number of calls to match() */ + void *callout_data; /* Data passed back in callouts */ + const unsigned char *tables; /* Pointer to character tables */ + unsigned long int match_limit_recursion; /* Max recursive calls to match() */ + unsigned char **mark; /* For passing back a mark pointer */ + void *executable_jit; /* Contains a pointer to a compiled jit code */ +} pcre_extra; + +/* Same structure as above, but with 16 bit char pointers. */ + +typedef struct pcre16_extra { + unsigned long int flags; /* Bits for which fields are set */ + void *study_data; /* Opaque data from pcre_study() */ + unsigned long int match_limit; /* Maximum number of calls to match() */ + void *callout_data; /* Data passed back in callouts */ + const unsigned char *tables; /* Pointer to character tables */ + unsigned long int match_limit_recursion; /* Max recursive calls to match() */ + PCRE_UCHAR16 **mark; /* For passing back a mark pointer */ + void *executable_jit; /* Contains a pointer to a compiled jit code */ +} pcre16_extra; + +/* Same structure as above, but with 32 bit char pointers. */ + +typedef struct pcre32_extra { + unsigned long int flags; /* Bits for which fields are set */ + void *study_data; /* Opaque data from pcre_study() */ + unsigned long int match_limit; /* Maximum number of calls to match() */ + void *callout_data; /* Data passed back in callouts */ + const unsigned char *tables; /* Pointer to character tables */ + unsigned long int match_limit_recursion; /* Max recursive calls to match() */ + PCRE_UCHAR32 **mark; /* For passing back a mark pointer */ + void *executable_jit; /* Contains a pointer to a compiled jit code */ +} pcre32_extra; + +/* The structure for passing out data via the pcre_callout_function. We use a +structure so that new fields can be added on the end in future versions, +without changing the API of the function, thereby allowing old clients to work +without modification. */ + +typedef struct pcre_callout_block { + int version; /* Identifies version of block */ + /* ------------------------ Version 0 ------------------------------- */ + int callout_number; /* Number compiled into pattern */ + int *offset_vector; /* The offset vector */ + PCRE_SPTR subject; /* The subject being matched */ + int subject_length; /* The length of the subject */ + int start_match; /* Offset to start of this match attempt */ + int current_position; /* Where we currently are in the subject */ + int capture_top; /* Max current capture */ + int capture_last; /* Most recently closed capture */ + void *callout_data; /* Data passed in with the call */ + /* ------------------- Added for Version 1 -------------------------- */ + int pattern_position; /* Offset to next item in the pattern */ + int next_item_length; /* Length of next item in the pattern */ + /* ------------------- Added for Version 2 -------------------------- */ + const unsigned char *mark; /* Pointer to current mark or NULL */ + /* ------------------------------------------------------------------ */ +} pcre_callout_block; + +/* Same structure as above, but with 16 bit char pointers. */ + +typedef struct pcre16_callout_block { + int version; /* Identifies version of block */ + /* ------------------------ Version 0 ------------------------------- */ + int callout_number; /* Number compiled into pattern */ + int *offset_vector; /* The offset vector */ + PCRE_SPTR16 subject; /* The subject being matched */ + int subject_length; /* The length of the subject */ + int start_match; /* Offset to start of this match attempt */ + int current_position; /* Where we currently are in the subject */ + int capture_top; /* Max current capture */ + int capture_last; /* Most recently closed capture */ + void *callout_data; /* Data passed in with the call */ + /* ------------------- Added for Version 1 -------------------------- */ + int pattern_position; /* Offset to next item in the pattern */ + int next_item_length; /* Length of next item in the pattern */ + /* ------------------- Added for Version 2 -------------------------- */ + const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */ + /* ------------------------------------------------------------------ */ +} pcre16_callout_block; + +/* Same structure as above, but with 32 bit char pointers. */ + +typedef struct pcre32_callout_block { + int version; /* Identifies version of block */ + /* ------------------------ Version 0 ------------------------------- */ + int callout_number; /* Number compiled into pattern */ + int *offset_vector; /* The offset vector */ + PCRE_SPTR32 subject; /* The subject being matched */ + int subject_length; /* The length of the subject */ + int start_match; /* Offset to start of this match attempt */ + int current_position; /* Where we currently are in the subject */ + int capture_top; /* Max current capture */ + int capture_last; /* Most recently closed capture */ + void *callout_data; /* Data passed in with the call */ + /* ------------------- Added for Version 1 -------------------------- */ + int pattern_position; /* Offset to next item in the pattern */ + int next_item_length; /* Length of next item in the pattern */ + /* ------------------- Added for Version 2 -------------------------- */ + const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */ + /* ------------------------------------------------------------------ */ +} pcre32_callout_block; + +/* Indirection for store get and free functions. These can be set to +alternative malloc/free functions if required. Special ones are used in the +non-recursive case for "frames". There is also an optional callout function +that is triggered by the (?) regex item. For Virtual Pascal, these definitions +have to take another form. */ + +#ifndef VPCOMPAT +PCRE_EXP_DECL void *(*pcre_malloc)(size_t); +PCRE_EXP_DECL void (*pcre_free)(void *); +PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); +PCRE_EXP_DECL void (*pcre_stack_free)(void *); +PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *); +PCRE_EXP_DECL int (*pcre_stack_guard)(void); + +PCRE_EXP_DECL void *(*pcre16_malloc)(size_t); +PCRE_EXP_DECL void (*pcre16_free)(void *); +PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t); +PCRE_EXP_DECL void (*pcre16_stack_free)(void *); +PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *); +PCRE_EXP_DECL int (*pcre16_stack_guard)(void); + +PCRE_EXP_DECL void *(*pcre32_malloc)(size_t); +PCRE_EXP_DECL void (*pcre32_free)(void *); +PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t); +PCRE_EXP_DECL void (*pcre32_stack_free)(void *); +PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *); +PCRE_EXP_DECL int (*pcre32_stack_guard)(void); +#else /* VPCOMPAT */ +PCRE_EXP_DECL void *pcre_malloc(size_t); +PCRE_EXP_DECL void pcre_free(void *); +PCRE_EXP_DECL void *pcre_stack_malloc(size_t); +PCRE_EXP_DECL void pcre_stack_free(void *); +PCRE_EXP_DECL int pcre_callout(pcre_callout_block *); +PCRE_EXP_DECL int pcre_stack_guard(void); + +PCRE_EXP_DECL void *pcre16_malloc(size_t); +PCRE_EXP_DECL void pcre16_free(void *); +PCRE_EXP_DECL void *pcre16_stack_malloc(size_t); +PCRE_EXP_DECL void pcre16_stack_free(void *); +PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *); +PCRE_EXP_DECL int pcre16_stack_guard(void); + +PCRE_EXP_DECL void *pcre32_malloc(size_t); +PCRE_EXP_DECL void pcre32_free(void *); +PCRE_EXP_DECL void *pcre32_stack_malloc(size_t); +PCRE_EXP_DECL void pcre32_stack_free(void *); +PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *); +PCRE_EXP_DECL int pcre32_stack_guard(void); +#endif /* VPCOMPAT */ + +/* User defined callback which provides a stack just before the match starts. */ + +typedef pcre_jit_stack *(*pcre_jit_callback)(void *); +typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *); +typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *); + +/* Exported PCRE functions */ + +PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, + const unsigned char *); +PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *, + const unsigned char *); +PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *, + const unsigned char *); +PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, + int *, const unsigned char *); +PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **, + int *, const unsigned char *); +PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **, + int *, const unsigned char *); +PCRE_EXP_DECL int pcre_config(int, void *); +PCRE_EXP_DECL int pcre16_config(int, void *); +PCRE_EXP_DECL int pcre32_config(int, void *); +PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *, + int *, int, const char *, char *, int); +PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16, + int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int); +PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32, + int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int); +PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, + char *, int); +PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int, + PCRE_UCHAR16 *, int); +PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int, + PCRE_UCHAR32 *, int); +PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *, + const char *, int, int, int, int *, int , int *, int); +PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *, + PCRE_SPTR16, int, int, int, int *, int , int *, int); +PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *, + PCRE_SPTR32, int, int, int, int *, int , int *, int); +PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, + int, int, int, int *, int); +PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *, + PCRE_SPTR16, int, int, int, int *, int); +PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *, + PCRE_SPTR32, int, int, int, int *, int); +PCRE_EXP_DECL int pcre_jit_exec(const pcre *, const pcre_extra *, + PCRE_SPTR, int, int, int, int *, int, + pcre_jit_stack *); +PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *, + PCRE_SPTR16, int, int, int, int *, int, + pcre16_jit_stack *); +PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *, + PCRE_SPTR32, int, int, int, int *, int, + pcre32_jit_stack *); +PCRE_EXP_DECL void pcre_free_substring(const char *); +PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16); +PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32); +PCRE_EXP_DECL void pcre_free_substring_list(const char **); +PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *); +PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *); +PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int, + void *); +PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int, + void *); +PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int, + void *); +PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *, + int *, int, const char *, const char **); +PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16, + int *, int, PCRE_SPTR16, PCRE_SPTR16 *); +PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32, + int *, int, PCRE_SPTR32, PCRE_SPTR32 *); +PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *); +PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16); +PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32); +PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *, + char **, char **); +PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16, + PCRE_UCHAR16 **, PCRE_UCHAR16 **); +PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32, + PCRE_UCHAR32 **, PCRE_UCHAR32 **); +PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int, + const char **); +PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int, + PCRE_SPTR16 *); +PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int, + PCRE_SPTR32 *); +PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int, + const char ***); +PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int, + PCRE_SPTR16 **); +PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int, + PCRE_SPTR32 **); +PCRE_EXP_DECL const unsigned char *pcre_maketables(void); +PCRE_EXP_DECL const unsigned char *pcre16_maketables(void); +PCRE_EXP_DECL const unsigned char *pcre32_maketables(void); +PCRE_EXP_DECL int pcre_refcount(pcre *, int); +PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int); +PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int); +PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); +PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **); +PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **); +PCRE_EXP_DECL void pcre_free_study(pcre_extra *); +PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *); +PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *); +PCRE_EXP_DECL const char *pcre_version(void); +PCRE_EXP_DECL const char *pcre16_version(void); +PCRE_EXP_DECL const char *pcre32_version(void); + +/* Utility functions for byte order swaps. */ +PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *, + const unsigned char *); +PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *, + const unsigned char *); +PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *, + const unsigned char *); +PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *, + PCRE_SPTR16, int, int *, int); +PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *, + PCRE_SPTR32, int, int *, int); + +/* JIT compiler related functions. */ + +PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int); +PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int); +PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int); +PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *); +PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *); +PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *); +PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, + pcre_jit_callback, void *); +PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *, + pcre16_jit_callback, void *); +PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *, + pcre32_jit_callback, void *); +PCRE_EXP_DECL void pcre_jit_free_unused_memory(void); +PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void); +PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* End of pcre.h */ diff --git a/third-party/libpcre/include/pcre_scanner.h b/third-party/libpcre/include/pcre_scanner.h new file mode 100644 index 00000000..5617e451 --- /dev/null +++ b/third-party/libpcre/include/pcre_scanner.h @@ -0,0 +1,172 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat +// +// Regular-expression based scanner for parsing an input stream. +// +// Example 1: parse a sequence of "var = number" entries from input: +// +// Scanner scanner(input); +// string var; +// int number; +// scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter +// while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) { +// ...; +// } + +#ifndef _PCRE_SCANNER_H +#define _PCRE_SCANNER_H + +#include +#include +#include + +#include +#include + +namespace pcrecpp { + +class PCRECPP_EXP_DEFN Scanner { + public: + Scanner(); + explicit Scanner(const std::string& input); + ~Scanner(); + + // Return current line number. The returned line-number is + // one-based. I.e. it returns 1 + the number of consumed newlines. + // + // Note: this method may be slow. It may take time proportional to + // the size of the input. + int LineNumber() const; + + // Return the byte-offset that the scanner is looking in the + // input data; + int Offset() const; + + // Return true iff the start of the remaining input matches "re" + bool LookingAt(const RE& re) const; + + // Return true iff all of the following are true + // a. the start of the remaining input matches "re", + // b. if any arguments are supplied, matched sub-patterns can be + // parsed and stored into the arguments. + // If it returns true, it skips over the matched input and any + // following input that matches the "skip" regular expression. + bool Consume(const RE& re, + const Arg& arg0 = RE::no_arg, + const Arg& arg1 = RE::no_arg, + const Arg& arg2 = RE::no_arg + // TODO: Allow more arguments? + ); + + // Set the "skip" regular expression. If after consuming some data, + // a prefix of the input matches this RE, it is automatically + // skipped. For example, a programming language scanner would use + // a skip RE that matches white space and comments. + // + // scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/"); + // + // Skipping repeats as long as it succeeds. We used to let people do + // this by writing "(...)*" in the regular expression, but that added + // up to lots of recursive calls within the pcre library, so now we + // control repetition explicitly via the function call API. + // + // You can pass NULL for "re" if you do not want any data to be skipped. + void Skip(const char* re); // DEPRECATED; does *not* repeat + void SetSkipExpression(const char* re); + + // Temporarily pause "skip"ing. This + // Skip("Foo"); code ; DisableSkip(); code; EnableSkip() + // is similar to + // Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo"); + // but avoids creating/deleting new RE objects. + void DisableSkip(); + + // Reenable previously paused skipping. Any prefix of the input + // that matches the skip pattern is immediately dropped. + void EnableSkip(); + + /***** Special wrappers around SetSkip() for some common idioms *****/ + + // Arranges to skip whitespace, C comments, C++ comments. + // The overall RE is a disjunction of the following REs: + // \\s whitespace + // //.*\n C++ comment + // /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x) + // We get repetition via the semantics of SetSkipExpression, not by using * + void SkipCXXComments() { + SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/"); + } + + void set_save_comments(bool comments) { + save_comments_ = comments; + } + + bool save_comments() { + return save_comments_; + } + + // Append to vector ranges the comments found in the + // byte range [start,end] (inclusive) of the input data. + // Only comments that were extracted entirely within that + // range are returned: no range splitting of atomically-extracted + // comments is performed. + void GetComments(int start, int end, std::vector *ranges); + + // Append to vector ranges the comments added + // since the last time this was called. This + // functionality is provided for efficiency when + // interleaving scanning with parsing. + void GetNextComments(std::vector *ranges); + + private: + std::string data_; // All the input data + StringPiece input_; // Unprocessed input + RE* skip_; // If non-NULL, RE for skipping input + bool should_skip_; // If true, use skip_ + bool skip_repeat_; // If true, repeat skip_ as long as it works + bool save_comments_; // If true, aggregate the skip expression + + // the skipped comments + // TODO: later consider requiring that the StringPieces be added + // in order by their start position + std::vector *comments_; + + // the offset into comments_ that has been returned by GetNextComments + int comments_offset_; + + // helper function to consume *skip_ and honour + // save_comments_ + void ConsumeSkip(); +}; + +} // namespace pcrecpp + +#endif /* _PCRE_SCANNER_H */ diff --git a/third-party/libpcre/include/pcre_stringpiece.h b/third-party/libpcre/include/pcre_stringpiece.h new file mode 100644 index 00000000..cb94f52a --- /dev/null +++ b/third-party/libpcre/include/pcre_stringpiece.h @@ -0,0 +1,180 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat +// +// A string like object that points into another piece of memory. +// Useful for providing an interface that allows clients to easily +// pass in either a "const char*" or a "string". +// +// Arghh! I wish C++ literals were automatically of type "string". + +#ifndef _PCRE_STRINGPIECE_H +#define _PCRE_STRINGPIECE_H + +#include +#include +#include // for ostream forward-declaration + +#if 0 +#define HAVE_TYPE_TRAITS +#include +#elif 0 +#define HAVE_TYPE_TRAITS +#include +#endif + +#include + +namespace pcrecpp { + +using std::memcmp; +using std::strlen; +using std::string; + +class PCRECPP_EXP_DEFN StringPiece { + private: + const char* ptr_; + int length_; + + public: + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected. + StringPiece() + : ptr_(NULL), length_(0) { } + StringPiece(const char* str) + : ptr_(str), length_(static_cast(strlen(ptr_))) { } + StringPiece(const unsigned char* str) + : ptr_(reinterpret_cast(str)), + length_(static_cast(strlen(ptr_))) { } + StringPiece(const string& str) + : ptr_(str.data()), length_(static_cast(str.size())) { } + StringPiece(const char* offset, int len) + : ptr_(offset), length_(len) { } + + // data() may return a pointer to a buffer with embedded NULs, and the + // returned buffer may or may not be null terminated. Therefore it is + // typically a mistake to pass data() to a routine that expects a NUL + // terminated string. Use "as_string().c_str()" if you really need to do + // this. Or better yet, change your routine so it does not rely on NUL + // termination. + const char* data() const { return ptr_; } + int size() const { return length_; } + bool empty() const { return length_ == 0; } + + void clear() { ptr_ = NULL; length_ = 0; } + void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; } + void set(const char* str) { + ptr_ = str; + length_ = static_cast(strlen(str)); + } + void set(const void* buffer, int len) { + ptr_ = reinterpret_cast(buffer); + length_ = len; + } + + char operator[](int i) const { return ptr_[i]; } + + void remove_prefix(int n) { + ptr_ += n; + length_ -= n; + } + + void remove_suffix(int n) { + length_ -= n; + } + + bool operator==(const StringPiece& x) const { + return ((length_ == x.length_) && + (memcmp(ptr_, x.ptr_, length_) == 0)); + } + bool operator!=(const StringPiece& x) const { + return !(*this == x); + } + +#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \ + bool operator cmp (const StringPiece& x) const { \ + int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \ + return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \ + } + STRINGPIECE_BINARY_PREDICATE(<, <); + STRINGPIECE_BINARY_PREDICATE(<=, <); + STRINGPIECE_BINARY_PREDICATE(>=, >); + STRINGPIECE_BINARY_PREDICATE(>, >); +#undef STRINGPIECE_BINARY_PREDICATE + + int compare(const StringPiece& x) const { + int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); + if (r == 0) { + if (length_ < x.length_) r = -1; + else if (length_ > x.length_) r = +1; + } + return r; + } + + string as_string() const { + return string(data(), size()); + } + + void CopyToString(string* target) const { + target->assign(ptr_, length_); + } + + // Does "this" start with "x" + bool starts_with(const StringPiece& x) const { + return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0)); + } +}; + +} // namespace pcrecpp + +// ------------------------------------------------------------------ +// Functions used to create STL containers that use StringPiece +// Remember that a StringPiece's lifetime had better be less than +// that of the underlying string or char*. If it is not, then you +// cannot safely store a StringPiece into an STL container +// ------------------------------------------------------------------ + +#ifdef HAVE_TYPE_TRAITS +// This makes vector really fast for some STL implementations +template<> struct __type_traits { + typedef __true_type has_trivial_default_constructor; + typedef __true_type has_trivial_copy_constructor; + typedef __true_type has_trivial_assignment_operator; + typedef __true_type has_trivial_destructor; + typedef __true_type is_POD_type; +}; +#endif + +// allow StringPiece to be logged +PCRECPP_EXP_DECL std::ostream& operator<<(std::ostream& o, + const pcrecpp::StringPiece& piece); + +#endif /* _PCRE_STRINGPIECE_H */ diff --git a/third-party/libpcre/include/pcrecpp.h b/third-party/libpcre/include/pcrecpp.h new file mode 100644 index 00000000..3e594b0d --- /dev/null +++ b/third-party/libpcre/include/pcrecpp.h @@ -0,0 +1,710 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat +// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005 + +#ifndef _PCRECPP_H +#define _PCRECPP_H + +// C++ interface to the pcre regular-expression library. RE supports +// Perl-style regular expressions (with extensions like \d, \w, \s, +// ...). +// +// ----------------------------------------------------------------------- +// REGEXP SYNTAX: +// +// This module is part of the pcre library and hence supports its syntax +// for regular expressions. +// +// The syntax is pretty similar to Perl's. For those not familiar +// with Perl's regular expressions, here are some examples of the most +// commonly used extensions: +// +// "hello (\\w+) world" -- \w matches a "word" character +// "version (\\d+)" -- \d matches a digit +// "hello\\s+world" -- \s matches any whitespace character +// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary +// "(?i)hello" -- (?i) turns on case-insensitive matching +// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible +// +// ----------------------------------------------------------------------- +// MATCHING INTERFACE: +// +// The "FullMatch" operation checks that supplied text matches a +// supplied pattern exactly. +// +// Example: successful match +// pcrecpp::RE re("h.*o"); +// re.FullMatch("hello"); +// +// Example: unsuccessful match (requires full match): +// pcrecpp::RE re("e"); +// !re.FullMatch("hello"); +// +// Example: creating a temporary RE object: +// pcrecpp::RE("h.*o").FullMatch("hello"); +// +// You can pass in a "const char*" or a "string" for "text". The +// examples below tend to use a const char*. +// +// You can, as in the different examples above, store the RE object +// explicitly in a variable or use a temporary RE object. The +// examples below use one mode or the other arbitrarily. Either +// could correctly be used for any of these examples. +// +// ----------------------------------------------------------------------- +// MATCHING WITH SUB-STRING EXTRACTION: +// +// You can supply extra pointer arguments to extract matched subpieces. +// +// Example: extracts "ruby" into "s" and 1234 into "i" +// int i; +// string s; +// pcrecpp::RE re("(\\w+):(\\d+)"); +// re.FullMatch("ruby:1234", &s, &i); +// +// Example: does not try to extract any extra sub-patterns +// re.FullMatch("ruby:1234", &s); +// +// Example: does not try to extract into NULL +// re.FullMatch("ruby:1234", NULL, &i); +// +// Example: integer overflow causes failure +// !re.FullMatch("ruby:1234567891234", NULL, &i); +// +// Example: fails because there aren't enough sub-patterns: +// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s); +// +// Example: fails because string cannot be stored in integer +// !pcrecpp::RE("(.*)").FullMatch("ruby", &i); +// +// The provided pointer arguments can be pointers to any scalar numeric +// type, or one of +// string (matched piece is copied to string) +// StringPiece (StringPiece is mutated to point to matched piece) +// T (where "bool T::ParseFrom(const char*, int)" exists) +// NULL (the corresponding matched sub-pattern is not copied) +// +// CAVEAT: An optional sub-pattern that does not exist in the matched +// string is assigned the empty string. Therefore, the following will +// return false (because the empty string is not a valid number): +// int number; +// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number); +// +// ----------------------------------------------------------------------- +// DO_MATCH +// +// The matching interface supports at most 16 arguments per call. +// If you need more, consider using the more general interface +// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch. +// +// ----------------------------------------------------------------------- +// PARTIAL MATCHES +// +// You can use the "PartialMatch" operation when you want the pattern +// to match any substring of the text. +// +// Example: simple search for a string: +// pcrecpp::RE("ell").PartialMatch("hello"); +// +// Example: find first number in a string: +// int number; +// pcrecpp::RE re("(\\d+)"); +// re.PartialMatch("x*100 + 20", &number); +// assert(number == 100); +// +// ----------------------------------------------------------------------- +// UTF-8 AND THE MATCHING INTERFACE: +// +// By default, pattern and text are plain text, one byte per character. +// The UTF8 flag, passed to the constructor, causes both pattern +// and string to be treated as UTF-8 text, still a byte stream but +// potentially multiple bytes per character. In practice, the text +// is likelier to be UTF-8 than the pattern, but the match returned +// may depend on the UTF8 flag, so always use it when matching +// UTF8 text. E.g., "." will match one byte normally but with UTF8 +// set may match up to three bytes of a multi-byte character. +// +// Example: +// pcrecpp::RE_Options options; +// options.set_utf8(); +// pcrecpp::RE re(utf8_pattern, options); +// re.FullMatch(utf8_string); +// +// Example: using the convenience function UTF8(): +// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8()); +// re.FullMatch(utf8_string); +// +// NOTE: The UTF8 option is ignored if pcre was not configured with the +// --enable-utf8 flag. +// +// ----------------------------------------------------------------------- +// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE +// +// PCRE defines some modifiers to change the behavior of the regular +// expression engine. +// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle +// to pass such modifiers to a RE class. +// +// Currently, the following modifiers are supported +// +// modifier description Perl corresponding +// +// PCRE_CASELESS case insensitive match /i +// PCRE_MULTILINE multiple lines match /m +// PCRE_DOTALL dot matches newlines /s +// PCRE_DOLLAR_ENDONLY $ matches only at end N/A +// PCRE_EXTRA strict escape parsing N/A +// PCRE_EXTENDED ignore whitespaces /x +// PCRE_UTF8 handles UTF8 chars built-in +// PCRE_UNGREEDY reverses * and *? N/A +// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*) +// +// (For a full account on how each modifier works, please check the +// PCRE API reference manual). +// +// (*) Both Perl and PCRE allow non matching parentheses by means of the +// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not +// capture, while (ab|cd) does. +// +// For each modifier, there are two member functions whose name is made +// out of the modifier in lowercase, without the "PCRE_" prefix. For +// instance, PCRE_CASELESS is handled by +// bool caseless(), +// which returns true if the modifier is set, and +// RE_Options & set_caseless(bool), +// which sets or unsets the modifier. +// +// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the +// set_match_limit() and match_limit() member functions. +// Setting match_limit to a non-zero value will limit the executation of +// pcre to keep it from doing bad things like blowing the stack or taking +// an eternity to return a result. A value of 5000 is good enough to stop +// stack blowup in a 2MB thread stack. Setting match_limit to zero will +// disable match limiting. Alternately, you can set match_limit_recursion() +// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre +// recurses. match_limit() caps the number of matches pcre does; +// match_limit_recrusion() caps the depth of recursion. +// +// Normally, to pass one or more modifiers to a RE class, you declare +// a RE_Options object, set the appropriate options, and pass this +// object to a RE constructor. Example: +// +// RE_options opt; +// opt.set_caseless(true); +// +// if (RE("HELLO", opt).PartialMatch("hello world")) ... +// +// RE_options has two constructors. The default constructor takes no +// arguments and creates a set of flags that are off by default. +// +// The optional parameter 'option_flags' is to facilitate transfer +// of legacy code from C programs. This lets you do +// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str); +// +// But new code is better off doing +// RE(pattern, +// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str); +// (See below) +// +// If you are going to pass one of the most used modifiers, there are some +// convenience functions that return a RE_Options class with the +// appropriate modifier already set: +// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED() +// +// If you need to set several options at once, and you don't want to go +// through the pains of declaring a RE_Options object and setting several +// options, there is a parallel method that give you such ability on the +// fly. You can concatenate several set_xxxxx member functions, since each +// of them returns a reference to its class object. e.g.: to pass +// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one +// statement, you may write +// +// RE(" ^ xyz \\s+ .* blah$", RE_Options() +// .set_caseless(true) +// .set_extended(true) +// .set_multiline(true)).PartialMatch(sometext); +// +// ----------------------------------------------------------------------- +// SCANNING TEXT INCREMENTALLY +// +// The "Consume" operation may be useful if you want to repeatedly +// match regular expressions at the front of a string and skip over +// them as they match. This requires use of the "StringPiece" type, +// which represents a sub-range of a real string. Like RE, StringPiece +// is defined in the pcrecpp namespace. +// +// Example: read lines of the form "var = value" from a string. +// string contents = ...; // Fill string somehow +// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece +// +// string var; +// int value; +// pcrecpp::RE re("(\\w+) = (\\d+)\n"); +// while (re.Consume(&input, &var, &value)) { +// ...; +// } +// +// Each successful call to "Consume" will set "var/value", and also +// advance "input" so it points past the matched text. +// +// The "FindAndConsume" operation is similar to "Consume" but does not +// anchor your match at the beginning of the string. For example, you +// could extract all words from a string by repeatedly calling +// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word) +// +// ----------------------------------------------------------------------- +// PARSING HEX/OCTAL/C-RADIX NUMBERS +// +// By default, if you pass a pointer to a numeric value, the +// corresponding text is interpreted as a base-10 number. You can +// instead wrap the pointer with a call to one of the operators Hex(), +// Octal(), or CRadix() to interpret the text in another base. The +// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) +// prefixes, but defaults to base-10. +// +// Example: +// int a, b, c, d; +// pcrecpp::RE re("(.*) (.*) (.*) (.*)"); +// re.FullMatch("100 40 0100 0x40", +// pcrecpp::Octal(&a), pcrecpp::Hex(&b), +// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d)); +// will leave 64 in a, b, c, and d. +// +// ----------------------------------------------------------------------- +// REPLACING PARTS OF STRINGS +// +// You can replace the first match of "pattern" in "str" with +// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9) +// can be used to insert text matching corresponding parenthesized +// group from the pattern. \0 in "rewrite" refers to the entire +// matching text. E.g., +// +// string s = "yabba dabba doo"; +// pcrecpp::RE("b+").Replace("d", &s); +// +// will leave "s" containing "yada dabba doo". The result is true if +// the pattern matches and a replacement occurs, or false otherwise. +// +// GlobalReplace() is like Replace(), except that it replaces all +// occurrences of the pattern in the string with the rewrite. +// Replacements are not subject to re-matching. E.g., +// +// string s = "yabba dabba doo"; +// pcrecpp::RE("b+").GlobalReplace("d", &s); +// +// will leave "s" containing "yada dada doo". It returns the number +// of replacements made. +// +// Extract() is like Replace(), except that if the pattern matches, +// "rewrite" is copied into "out" (an additional argument) with +// substitutions. The non-matching portions of "text" are ignored. +// Returns true iff a match occurred and the extraction happened +// successfully. If no match occurs, the string is left unaffected. + + +#include +#include +#include // defines the Arg class +// This isn't technically needed here, but we include it +// anyway so folks who include pcrecpp.h don't have to. +#include + +namespace pcrecpp { + +#define PCRE_SET_OR_CLEAR(b, o) \ + if (b) all_options_ |= (o); else all_options_ &= ~(o); \ + return *this + +#define PCRE_IS_SET(o) \ + (all_options_ & o) == o + +/***** Compiling regular expressions: the RE class *****/ + +// RE_Options allow you to set options to be passed along to pcre, +// along with other options we put on top of pcre. +// Only 9 modifiers, plus match_limit and match_limit_recursion, +// are supported now. +class PCRECPP_EXP_DEFN RE_Options { + public: + // constructor + RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {} + + // alternative constructor. + // To facilitate transfer of legacy code from C programs + // + // This lets you do + // RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str); + // But new code is better off doing + // RE(pattern, + // RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str); + RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0), + all_options_(option_flags) {} + // we're fine with the default destructor, copy constructor, etc. + + // accessors and mutators + int match_limit() const { return match_limit_; }; + RE_Options &set_match_limit(int limit) { + match_limit_ = limit; + return *this; + } + + int match_limit_recursion() const { return match_limit_recursion_; }; + RE_Options &set_match_limit_recursion(int limit) { + match_limit_recursion_ = limit; + return *this; + } + + bool caseless() const { + return PCRE_IS_SET(PCRE_CASELESS); + } + RE_Options &set_caseless(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_CASELESS); + } + + bool multiline() const { + return PCRE_IS_SET(PCRE_MULTILINE); + } + RE_Options &set_multiline(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE); + } + + bool dotall() const { + return PCRE_IS_SET(PCRE_DOTALL); + } + RE_Options &set_dotall(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_DOTALL); + } + + bool extended() const { + return PCRE_IS_SET(PCRE_EXTENDED); + } + RE_Options &set_extended(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED); + } + + bool dollar_endonly() const { + return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY); + } + RE_Options &set_dollar_endonly(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY); + } + + bool extra() const { + return PCRE_IS_SET(PCRE_EXTRA); + } + RE_Options &set_extra(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_EXTRA); + } + + bool ungreedy() const { + return PCRE_IS_SET(PCRE_UNGREEDY); + } + RE_Options &set_ungreedy(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY); + } + + bool utf8() const { + return PCRE_IS_SET(PCRE_UTF8); + } + RE_Options &set_utf8(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_UTF8); + } + + bool no_auto_capture() const { + return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE); + } + RE_Options &set_no_auto_capture(bool x) { + PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE); + } + + RE_Options &set_all_options(int opt) { + all_options_ = opt; + return *this; + } + int all_options() const { + return all_options_ ; + } + + // TODO: add other pcre flags + + private: + int match_limit_; + int match_limit_recursion_; + int all_options_; +}; + +// These functions return some common RE_Options +static inline RE_Options UTF8() { + return RE_Options().set_utf8(true); +} + +static inline RE_Options CASELESS() { + return RE_Options().set_caseless(true); +} +static inline RE_Options MULTILINE() { + return RE_Options().set_multiline(true); +} + +static inline RE_Options DOTALL() { + return RE_Options().set_dotall(true); +} + +static inline RE_Options EXTENDED() { + return RE_Options().set_extended(true); +} + +// Interface for regular expression matching. Also corresponds to a +// pre-compiled regular expression. An "RE" object is safe for +// concurrent use by multiple threads. +class PCRECPP_EXP_DEFN RE { + public: + // We provide implicit conversions from strings so that users can + // pass in a string or a "const char*" wherever an "RE" is expected. + RE(const string& pat) { Init(pat, NULL); } + RE(const string& pat, const RE_Options& option) { Init(pat, &option); } + RE(const char* pat) { Init(pat, NULL); } + RE(const char* pat, const RE_Options& option) { Init(pat, &option); } + RE(const unsigned char* pat) { + Init(reinterpret_cast(pat), NULL); + } + RE(const unsigned char* pat, const RE_Options& option) { + Init(reinterpret_cast(pat), &option); + } + + // Copy constructor & assignment - note that these are expensive + // because they recompile the expression. + RE(const RE& re) { Init(re.pattern_, &re.options_); } + const RE& operator=(const RE& re) { + if (this != &re) { + Cleanup(); + + // This is the code that originally came from Google + // Init(re.pattern_.c_str(), &re.options_); + + // This is the replacement from Ari Pollak + Init(re.pattern_, &re.options_); + } + return *this; + } + + + ~RE(); + + // The string specification for this RE. E.g. + // RE re("ab*c?d+"); + // re.pattern(); // "ab*c?d+" + const string& pattern() const { return pattern_; } + + // If RE could not be created properly, returns an error string. + // Else returns the empty string. + const string& error() const { return *error_; } + + /***** The useful part: the matching interface *****/ + + // This is provided so one can do pattern.ReplaceAll() just as + // easily as ReplaceAll(pattern-text, ....) + + bool FullMatch(const StringPiece& text, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool PartialMatch(const StringPiece& text, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool Consume(StringPiece* input, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool FindAndConsume(StringPiece* input, + const Arg& ptr1 = no_arg, + const Arg& ptr2 = no_arg, + const Arg& ptr3 = no_arg, + const Arg& ptr4 = no_arg, + const Arg& ptr5 = no_arg, + const Arg& ptr6 = no_arg, + const Arg& ptr7 = no_arg, + const Arg& ptr8 = no_arg, + const Arg& ptr9 = no_arg, + const Arg& ptr10 = no_arg, + const Arg& ptr11 = no_arg, + const Arg& ptr12 = no_arg, + const Arg& ptr13 = no_arg, + const Arg& ptr14 = no_arg, + const Arg& ptr15 = no_arg, + const Arg& ptr16 = no_arg) const; + + bool Replace(const StringPiece& rewrite, + string *str) const; + + int GlobalReplace(const StringPiece& rewrite, + string *str) const; + + bool Extract(const StringPiece &rewrite, + const StringPiece &text, + string *out) const; + + // Escapes all potentially meaningful regexp characters in + // 'unquoted'. The returned string, used as a regular expression, + // will exactly match the original string. For example, + // 1.5-2.0? + // may become: + // 1\.5\-2\.0\? + // Note QuoteMeta behaves the same as perl's QuoteMeta function, + // *except* that it escapes the NUL character (\0) as backslash + 0, + // rather than backslash + NUL. + static string QuoteMeta(const StringPiece& unquoted); + + + /***** Generic matching interface *****/ + + // Type of match (TODO: Should be restructured as part of RE_Options) + enum Anchor { + UNANCHORED, // No anchoring + ANCHOR_START, // Anchor at start only + ANCHOR_BOTH // Anchor at start and end + }; + + // General matching routine. Stores the length of the match in + // "*consumed" if successful. + bool DoMatch(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const* args, int n) const; + + // Return the number of capturing subpatterns, or -1 if the + // regexp wasn't valid on construction. + int NumberOfCapturingGroups() const; + + // The default value for an argument, to indicate the end of the argument + // list. This must be used only in optional argument defaults. It should NOT + // be passed explicitly. Some people have tried to use it like this: + // + // FullMatch(x, y, &z, no_arg, &w); + // + // This is a mistake, and will not work. + static Arg no_arg; + + private: + + void Init(const string& pattern, const RE_Options* options); + void Cleanup(); + + // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with + // pairs of integers for the beginning and end positions of matched + // text. The first pair corresponds to the entire matched text; + // subsequent pairs correspond, in order, to parentheses-captured + // matches. Returns the number of pairs (one more than the number of + // the last subpattern with a match) if matching was successful + // and zero if the match failed. + // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching + // against "foo", "bar", and "baz" respectively. + // When matching RE("(foo)|hello") against "hello", it will return 1. + // But the values for all subpattern are filled in into "vec". + int TryMatch(const StringPiece& text, + int startpos, + Anchor anchor, + bool empty_ok, + int *vec, + int vecsize) const; + + // Append the "rewrite" string, with backslash subsitutions from "text" + // and "vec", to string "out". + bool Rewrite(string *out, + const StringPiece& rewrite, + const StringPiece& text, + int *vec, + int veclen) const; + + // internal implementation for DoMatch + bool DoMatchImpl(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const args[], + int n, + int* vec, + int vecsize) const; + + // Compile the regexp for the specified anchoring mode + pcre* Compile(Anchor anchor); + + string pattern_; + RE_Options options_; + pcre* re_full_; // For full matches + pcre* re_partial_; // For partial matches + const string* error_; // Error indicator (or points to empty string) +}; + +} // namespace pcrecpp + +#endif /* _PCRECPP_H */ diff --git a/third-party/libpcre/include/pcrecpparg.h b/third-party/libpcre/include/pcrecpparg.h new file mode 100644 index 00000000..b4f9c3f4 --- /dev/null +++ b/third-party/libpcre/include/pcrecpparg.h @@ -0,0 +1,174 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat + +#ifndef _PCRECPPARG_H +#define _PCRECPPARG_H + +#include // for NULL +#include + +#include + +namespace pcrecpp { + +class StringPiece; + +// Hex/Octal/Binary? + +// Special class for parsing into objects that define a ParseFrom() method +template +class _RE_MatchObject { + public: + static inline bool Parse(const char* str, int n, void* dest) { + if (dest == NULL) return true; + T* object = reinterpret_cast(dest); + return object->ParseFrom(str, n); + } +}; + +class PCRECPP_EXP_DEFN Arg { + public: + // Empty constructor so we can declare arrays of Arg + Arg(); + + // Constructor specially designed for NULL arguments + Arg(void*); + + typedef bool (*Parser)(const char* str, int n, void* dest); + +// Type-specific parsers +#define PCRE_MAKE_PARSER(type,name) \ + Arg(type* p) : arg_(p), parser_(name) { } \ + Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } + + + PCRE_MAKE_PARSER(char, parse_char); + PCRE_MAKE_PARSER(unsigned char, parse_uchar); + PCRE_MAKE_PARSER(short, parse_short); + PCRE_MAKE_PARSER(unsigned short, parse_ushort); + PCRE_MAKE_PARSER(int, parse_int); + PCRE_MAKE_PARSER(unsigned int, parse_uint); + PCRE_MAKE_PARSER(long, parse_long); + PCRE_MAKE_PARSER(unsigned long, parse_ulong); +#if 1 + PCRE_MAKE_PARSER(long long, parse_longlong); +#endif +#if 1 + PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong); +#endif + PCRE_MAKE_PARSER(float, parse_float); + PCRE_MAKE_PARSER(double, parse_double); + PCRE_MAKE_PARSER(std::string, parse_string); + PCRE_MAKE_PARSER(StringPiece, parse_stringpiece); + +#undef PCRE_MAKE_PARSER + + // Generic constructor + template Arg(T*, Parser parser); + // Generic constructor template + template Arg(T* p) + : arg_(p), parser_(_RE_MatchObject::Parse) { + } + + // Parse the data + bool Parse(const char* str, int n) const; + + private: + void* arg_; + Parser parser_; + + static bool parse_null (const char* str, int n, void* dest); + static bool parse_char (const char* str, int n, void* dest); + static bool parse_uchar (const char* str, int n, void* dest); + static bool parse_float (const char* str, int n, void* dest); + static bool parse_double (const char* str, int n, void* dest); + static bool parse_string (const char* str, int n, void* dest); + static bool parse_stringpiece (const char* str, int n, void* dest); + +#define PCRE_DECLARE_INTEGER_PARSER(name) \ + private: \ + static bool parse_ ## name(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _radix( \ + const char* str, int n, void* dest, int radix); \ + public: \ + static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _cradix(const char* str, int n, void* dest) + + PCRE_DECLARE_INTEGER_PARSER(short); + PCRE_DECLARE_INTEGER_PARSER(ushort); + PCRE_DECLARE_INTEGER_PARSER(int); + PCRE_DECLARE_INTEGER_PARSER(uint); + PCRE_DECLARE_INTEGER_PARSER(long); + PCRE_DECLARE_INTEGER_PARSER(ulong); + PCRE_DECLARE_INTEGER_PARSER(longlong); + PCRE_DECLARE_INTEGER_PARSER(ulonglong); + +#undef PCRE_DECLARE_INTEGER_PARSER +}; + +inline Arg::Arg() : arg_(NULL), parser_(parse_null) { } +inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } + +inline bool Arg::Parse(const char* str, int n) const { + return (*parser_)(str, n, arg_); +} + +// This part of the parser, appropriate only for ints, deals with bases +#define MAKE_INTEGER_PARSER(type, name) \ + inline Arg Hex(type* ptr) { \ + return Arg(ptr, Arg::parse_ ## name ## _hex); } \ + inline Arg Octal(type* ptr) { \ + return Arg(ptr, Arg::parse_ ## name ## _octal); } \ + inline Arg CRadix(type* ptr) { \ + return Arg(ptr, Arg::parse_ ## name ## _cradix); } + +MAKE_INTEGER_PARSER(short, short) /* */ +MAKE_INTEGER_PARSER(unsigned short, ushort) /* */ +MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */ +MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */ +MAKE_INTEGER_PARSER(long, long) /* because they can cause */ +MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */ +#if 1 /* the checking level is */ +MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */ +#endif /* */ +#if 1 /* */ +MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */ +#endif + +#undef PCRE_IS_SET +#undef PCRE_SET_OR_CLEAR +#undef MAKE_INTEGER_PARSER + +} // namespace pcrecpp + + +#endif /* _PCRECPPARG_H */ diff --git a/third-party/libpcre/include/pcreposix.h b/third-party/libpcre/include/pcreposix.h new file mode 100644 index 00000000..c77c0b05 --- /dev/null +++ b/third-party/libpcre/include/pcreposix.h @@ -0,0 +1,146 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +#ifndef _PCREPOSIX_H +#define _PCREPOSIX_H + +/* This is the header for the POSIX wrapper interface to the PCRE Perl- +Compatible Regular Expression library. It defines the things POSIX says should +be there. I hope. + + Copyright (c) 1997-2012 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* Have to include stdlib.h in order to ensure that size_t is defined. */ + +#include + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Options, mostly defined by POSIX, but with some extras. */ + +#define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */ +#define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */ +#define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */ +#define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */ +#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */ +#define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */ +#define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */ +#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ +#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */ +#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */ +#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */ + +/* This is not used by PCRE, but by defining it we make it easier +to slot PCRE into existing programs that make POSIX calls. */ + +#define REG_EXTENDED 0 + +/* Error values. Not all these are relevant or used by the wrapper. */ + +enum { + REG_ASSERT = 1, /* internal error ? */ + REG_BADBR, /* invalid repeat counts in {} */ + REG_BADPAT, /* pattern error */ + REG_BADRPT, /* ? * + invalid */ + REG_EBRACE, /* unbalanced {} */ + REG_EBRACK, /* unbalanced [] */ + REG_ECOLLATE, /* collation error - not relevant */ + REG_ECTYPE, /* bad class */ + REG_EESCAPE, /* bad escape sequence */ + REG_EMPTY, /* empty expression */ + REG_EPAREN, /* unbalanced () */ + REG_ERANGE, /* bad range inside [] */ + REG_ESIZE, /* expression too big */ + REG_ESPACE, /* failed to get memory */ + REG_ESUBREG, /* bad back reference */ + REG_INVARG, /* bad argument */ + REG_NOMATCH /* match failed */ +}; + + +/* The structure representing a compiled regular expression. */ + +typedef struct { + void *re_pcre; + size_t re_nsub; + size_t re_erroffset; +} regex_t; + +/* The structure in which a captured offset is returned. */ + +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +/* When an application links to a PCRE DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE, the appropriate +export settings are needed, and are set in pcreposix.c before including this +file. */ + +#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL) +# define PCREPOSIX_EXP_DECL extern __declspec(dllimport) +# define PCREPOSIX_EXP_DEFN __declspec(dllimport) +#endif + +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCREPOSIX_EXP_DECL +# ifdef __cplusplus +# define PCREPOSIX_EXP_DECL extern "C" +# define PCREPOSIX_EXP_DEFN extern "C" +# else +# define PCREPOSIX_EXP_DECL extern +# define PCREPOSIX_EXP_DEFN extern +# endif +#endif + +/* The functions */ + +PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int); +PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t, + regmatch_t *, int); +PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t); +PCREPOSIX_EXP_DECL void regfree(regex_t *); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* End of pcreposix.h */ diff --git a/third-party/libpcre/lib/libpcre.1.dylib b/third-party/libpcre/lib/libpcre.1.dylib new file mode 100755 index 00000000..c2b803bc --- /dev/null +++ b/third-party/libpcre/lib/libpcre.1.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e5aa3f2246d84fc74f8ec5fbe15a2b61b8494bba32a2d1d6509853ab93c307 +size 654340 diff --git a/third-party/libpcre/lib/libpcre.a b/third-party/libpcre/lib/libpcre.a new file mode 100644 index 00000000..fb61cef1 --- /dev/null +++ b/third-party/libpcre/lib/libpcre.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a9cb99558aca3e05e3a143c95afecdd1f8fb83bcd03ef50afc0e0e43fea8c1 +size 721312 diff --git a/third-party/libpcre/lib/libpcre.dylib b/third-party/libpcre/lib/libpcre.dylib new file mode 120000 index 00000000..8bcb5af4 --- /dev/null +++ b/third-party/libpcre/lib/libpcre.dylib @@ -0,0 +1 @@ +libpcre.1.dylib \ No newline at end of file diff --git a/third-party/libpcre/lib/libpcre.la b/third-party/libpcre/lib/libpcre.la new file mode 100755 index 00000000..68f4106d --- /dev/null +++ b/third-party/libpcre/lib/libpcre.la @@ -0,0 +1,41 @@ +# libpcre.la - a libtool library file +# Generated by libtool (GNU libtool) 2.4.6.42-b88ce +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='libpcre.1.dylib' + +# Names of this library. +library_names='libpcre.1.dylib libpcre.dylib' + +# The name of the static archive. +old_library='libpcre.a' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags=' -pthread' + +# Libraries that this one depends upon. +dependency_libs='' + +# Names of additional weak libraries provided by this library +weak_library_names='' + +# Version information for libpcre. +current=3 +age=2 +revision=11 + +# Is this an already installed library? +installed=yes + +# Should we warn about portability when linking against -modules? +shouldnotlink=no + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/Users/hat/vimr/.deps/pcre/../../third-party/libpcre/lib' diff --git a/third-party/libpcre/lib/libpcre16.0.dylib b/third-party/libpcre/lib/libpcre16.0.dylib new file mode 100755 index 00000000..65ad684e --- /dev/null +++ b/third-party/libpcre/lib/libpcre16.0.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551a2bcb404a54be6ef6ab736afcb7627c19811e6b89eb2d82c4c4a0b2748846 +size 592844 diff --git a/third-party/libpcre/lib/libpcre16.a b/third-party/libpcre/lib/libpcre16.a new file mode 100644 index 00000000..791eae01 --- /dev/null +++ b/third-party/libpcre/lib/libpcre16.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b5562f9a197a1c9dc606b84aafa6d32ea3a3541f8a25b2bef7947c5d936df18 +size 659920 diff --git a/third-party/libpcre/lib/libpcre16.dylib b/third-party/libpcre/lib/libpcre16.dylib new file mode 120000 index 00000000..c1494e90 --- /dev/null +++ b/third-party/libpcre/lib/libpcre16.dylib @@ -0,0 +1 @@ +libpcre16.0.dylib \ No newline at end of file diff --git a/third-party/libpcre/lib/libpcre16.la b/third-party/libpcre/lib/libpcre16.la new file mode 100755 index 00000000..9dae142d --- /dev/null +++ b/third-party/libpcre/lib/libpcre16.la @@ -0,0 +1,41 @@ +# libpcre16.la - a libtool library file +# Generated by libtool (GNU libtool) 2.4.6.42-b88ce +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='libpcre16.0.dylib' + +# Names of this library. +library_names='libpcre16.0.dylib libpcre16.dylib' + +# The name of the static archive. +old_library='libpcre16.a' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags=' -pthread' + +# Libraries that this one depends upon. +dependency_libs='' + +# Names of additional weak libraries provided by this library +weak_library_names='' + +# Version information for libpcre16. +current=2 +age=2 +revision=11 + +# Is this an already installed library? +installed=yes + +# Should we warn about portability when linking against -modules? +shouldnotlink=no + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/Users/hat/vimr/.deps/pcre/../../third-party/libpcre/lib' diff --git a/third-party/libpcre/lib/libpcre32.0.dylib b/third-party/libpcre/lib/libpcre32.0.dylib new file mode 100755 index 00000000..90aa0038 --- /dev/null +++ b/third-party/libpcre/lib/libpcre32.0.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e4798fe9c0bb6022859f2cab8aea6281595f93d02d7b9d259b7de8698c008a2 +size 572492 diff --git a/third-party/libpcre/lib/libpcre32.a b/third-party/libpcre/lib/libpcre32.a new file mode 100644 index 00000000..250ec111 --- /dev/null +++ b/third-party/libpcre/lib/libpcre32.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653499733f520180e6310f57b323c47d7f56921c4a91897afe07b2edc41c2359 +size 639864 diff --git a/third-party/libpcre/lib/libpcre32.dylib b/third-party/libpcre/lib/libpcre32.dylib new file mode 120000 index 00000000..8aa097c0 --- /dev/null +++ b/third-party/libpcre/lib/libpcre32.dylib @@ -0,0 +1 @@ +libpcre32.0.dylib \ No newline at end of file diff --git a/third-party/libpcre/lib/libpcre32.la b/third-party/libpcre/lib/libpcre32.la new file mode 100755 index 00000000..bcd4ddca --- /dev/null +++ b/third-party/libpcre/lib/libpcre32.la @@ -0,0 +1,41 @@ +# libpcre32.la - a libtool library file +# Generated by libtool (GNU libtool) 2.4.6.42-b88ce +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='libpcre32.0.dylib' + +# Names of this library. +library_names='libpcre32.0.dylib libpcre32.dylib' + +# The name of the static archive. +old_library='libpcre32.a' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags=' -pthread' + +# Libraries that this one depends upon. +dependency_libs='' + +# Names of additional weak libraries provided by this library +weak_library_names='' + +# Version information for libpcre32. +current=0 +age=0 +revision=11 + +# Is this an already installed library? +installed=yes + +# Should we warn about portability when linking against -modules? +shouldnotlink=no + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/Users/hat/vimr/.deps/pcre/../../third-party/libpcre/lib' diff --git a/third-party/libpcre/lib/libpcrecpp.0.dylib b/third-party/libpcre/lib/libpcrecpp.0.dylib new file mode 100755 index 00000000..80ea7cf8 --- /dev/null +++ b/third-party/libpcre/lib/libpcrecpp.0.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b75c8667fcb3b98434bab78e3ca4ac5fad4a89846038b024a4b88aa2783151 +size 45184 diff --git a/third-party/libpcre/lib/libpcrecpp.a b/third-party/libpcre/lib/libpcrecpp.a new file mode 100644 index 00000000..61100635 --- /dev/null +++ b/third-party/libpcre/lib/libpcrecpp.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6829c2417ff6297ac32064717bb9de4d21b1492bea318aa3e7cb1427ac9c6ce +size 47336 diff --git a/third-party/libpcre/lib/libpcrecpp.dylib b/third-party/libpcre/lib/libpcrecpp.dylib new file mode 120000 index 00000000..81c5c4ec --- /dev/null +++ b/third-party/libpcre/lib/libpcrecpp.dylib @@ -0,0 +1 @@ +libpcrecpp.0.dylib \ No newline at end of file diff --git a/third-party/libpcre/lib/libpcrecpp.la b/third-party/libpcre/lib/libpcrecpp.la new file mode 100755 index 00000000..dfae4535 --- /dev/null +++ b/third-party/libpcre/lib/libpcrecpp.la @@ -0,0 +1,41 @@ +# libpcrecpp.la - a libtool library file +# Generated by libtool (GNU libtool) 2.4.6.42-b88ce +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='libpcrecpp.0.dylib' + +# Names of this library. +library_names='libpcrecpp.0.dylib libpcrecpp.dylib' + +# The name of the static archive. +old_library='libpcrecpp.a' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags=' -pthread' + +# Libraries that this one depends upon. +dependency_libs=' /Users/hat/vimr/.deps/pcre/../../third-party/libpcre/lib/libpcre.la' + +# Names of additional weak libraries provided by this library +weak_library_names='' + +# Version information for libpcrecpp. +current=0 +age=0 +revision=1 + +# Is this an already installed library? +installed=yes + +# Should we warn about portability when linking against -modules? +shouldnotlink=no + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/Users/hat/vimr/.deps/pcre/../../third-party/libpcre/lib' diff --git a/third-party/libpcre/lib/libpcreposix.0.dylib b/third-party/libpcre/lib/libpcreposix.0.dylib new file mode 100755 index 00000000..8df8cc2b --- /dev/null +++ b/third-party/libpcre/lib/libpcreposix.0.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c900d74a340ad53d709b0054b1a0f4b14281d24506b2595aa5ed3299fed634 +size 13208 diff --git a/third-party/libpcre/lib/libpcreposix.a b/third-party/libpcre/lib/libpcreposix.a new file mode 100644 index 00000000..b1efba5e --- /dev/null +++ b/third-party/libpcre/lib/libpcreposix.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788213a696e22667bcbcfdeef83ad0c6776ce284641ff38c94cbc0ac45c5b846 +size 5336 diff --git a/third-party/libpcre/lib/libpcreposix.dylib b/third-party/libpcre/lib/libpcreposix.dylib new file mode 120000 index 00000000..80b3c25d --- /dev/null +++ b/third-party/libpcre/lib/libpcreposix.dylib @@ -0,0 +1 @@ +libpcreposix.0.dylib \ No newline at end of file diff --git a/third-party/libpcre/lib/libpcreposix.la b/third-party/libpcre/lib/libpcreposix.la new file mode 100755 index 00000000..de9d5cbd --- /dev/null +++ b/third-party/libpcre/lib/libpcreposix.la @@ -0,0 +1,41 @@ +# libpcreposix.la - a libtool library file +# Generated by libtool (GNU libtool) 2.4.6.42-b88ce +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='libpcreposix.0.dylib' + +# Names of this library. +library_names='libpcreposix.0.dylib libpcreposix.dylib' + +# The name of the static archive. +old_library='libpcreposix.a' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags=' -pthread' + +# Libraries that this one depends upon. +dependency_libs=' /Users/hat/vimr/.deps/pcre/../../third-party/libpcre/lib/libpcre.la' + +# Names of additional weak libraries provided by this library +weak_library_names='' + +# Version information for libpcreposix. +current=0 +age=0 +revision=6 + +# Is this an already installed library? +installed=yes + +# Should we warn about portability when linking against -modules? +shouldnotlink=no + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/Users/hat/vimr/.deps/pcre/../../third-party/libpcre/lib' diff --git a/third-party/libpcre/lib/pkgconfig/libpcre.pc b/third-party/libpcre/lib/pkgconfig/libpcre.pc new file mode 100644 index 00000000..ed5d5598 --- /dev/null +++ b/third-party/libpcre/lib/pkgconfig/libpcre.pc @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=/Users/hat/vimr/.deps/pcre/../../third-party/libpcre +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: libpcre +Description: PCRE - Perl compatible regular expressions C library with 8 bit character support +Version: 8.43 +Libs: -L${libdir} -lpcre +Libs.private: -D_THREAD_SAFE -pthread +Cflags: -I${includedir} diff --git a/third-party/libpcre/lib/pkgconfig/libpcre16.pc b/third-party/libpcre/lib/pkgconfig/libpcre16.pc new file mode 100644 index 00000000..644a4725 --- /dev/null +++ b/third-party/libpcre/lib/pkgconfig/libpcre16.pc @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=/Users/hat/vimr/.deps/pcre/../../third-party/libpcre +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: libpcre16 +Description: PCRE - Perl compatible regular expressions C library with 16 bit character support +Version: 8.43 +Libs: -L${libdir} -lpcre16 +Libs.private: -D_THREAD_SAFE -pthread +Cflags: -I${includedir} diff --git a/third-party/libpcre/lib/pkgconfig/libpcre32.pc b/third-party/libpcre/lib/pkgconfig/libpcre32.pc new file mode 100644 index 00000000..e1ac6fe4 --- /dev/null +++ b/third-party/libpcre/lib/pkgconfig/libpcre32.pc @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=/Users/hat/vimr/.deps/pcre/../../third-party/libpcre +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: libpcre32 +Description: PCRE - Perl compatible regular expressions C library with 32 bit character support +Version: 8.43 +Libs: -L${libdir} -lpcre32 +Libs.private: -D_THREAD_SAFE -pthread +Cflags: -I${includedir} diff --git a/third-party/libpcre/lib/pkgconfig/libpcrecpp.pc b/third-party/libpcre/lib/pkgconfig/libpcrecpp.pc new file mode 100644 index 00000000..36e476b5 --- /dev/null +++ b/third-party/libpcre/lib/pkgconfig/libpcrecpp.pc @@ -0,0 +1,12 @@ +# Package Information for pkg-config + +prefix=/Users/hat/vimr/.deps/pcre/../../third-party/libpcre +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: libpcrecpp +Description: PCRECPP - C++ wrapper for PCRE +Version: 8.43 +Libs: -L${libdir} -lpcre -lpcrecpp +Cflags: -I${includedir} diff --git a/third-party/libpcre/lib/pkgconfig/libpcreposix.pc b/third-party/libpcre/lib/pkgconfig/libpcreposix.pc new file mode 100644 index 00000000..8244c51b --- /dev/null +++ b/third-party/libpcre/lib/pkgconfig/libpcreposix.pc @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=/Users/hat/vimr/.deps/pcre/../../third-party/libpcre +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: libpcreposix +Description: PCREPosix - Posix compatible interface to libpcre +Version: 8.43 +Libs: -L${libdir} -lpcreposix +Cflags: -I${includedir} +Requires.private: libpcre diff --git a/third-party/libxz/include/lzma.h b/third-party/libxz/include/lzma.h new file mode 100644 index 00000000..aa88e424 --- /dev/null +++ b/third-party/libxz/include/lzma.h @@ -0,0 +1,325 @@ +/** + * \file api/lzma.h + * \brief The public API of liblzma data compression library + * + * liblzma is a public domain general-purpose data compression library with + * a zlib-like API. The native file format is .xz, but also the old .lzma + * format and raw (no headers) streams are supported. Multiple compression + * algorithms (filters) are supported. Currently LZMA2 is the primary filter. + * + * liblzma is part of XZ Utils . XZ Utils includes + * a gzip-like command line tool named xz and some other tools. XZ Utils + * is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK + * . + * + * The SHA-256 implementation is based on the public domain code found from + * 7-Zip , which has a modified version of the public + * domain SHA-256 code found from Crypto++ . + * The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef LZMA_H +#define LZMA_H + +/***************************** + * Required standard headers * + *****************************/ + +/* + * liblzma API headers need some standard types and macros. To allow + * including lzma.h without requiring the application to include other + * headers first, lzma.h includes the required standard headers unless + * they already seem to be included already or if LZMA_MANUAL_HEADERS + * has been defined. + * + * Here's what types and macros are needed and from which headers: + * - stddef.h: size_t, NULL + * - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n), + * UINT32_MAX, UINT64_MAX + * + * However, inttypes.h is a little more portable than stdint.h, although + * inttypes.h declares some unneeded things compared to plain stdint.h. + * + * The hacks below aren't perfect, specifically they assume that inttypes.h + * exists and that it typedefs at least uint8_t, uint32_t, and uint64_t, + * and that, in case of incomplete inttypes.h, unsigned int is 32-bit. + * If the application already takes care of setting up all the types and + * macros properly (for example by using gnulib's stdint.h or inttypes.h), + * we try to detect that the macros are already defined and don't include + * inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to + * force this file to never include any system headers. + * + * Some could argue that liblzma API should provide all the required types, + * for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was + * seen as an unnecessary mess, since most systems already provide all the + * necessary types and macros in the standard headers. + * + * Note that liblzma API still has lzma_bool, because using stdbool.h would + * break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't + * necessarily the same as sizeof(bool) in C++. + */ + +#ifndef LZMA_MANUAL_HEADERS + /* + * I suppose this works portably also in C++. Note that in C++, + * we need to get size_t into the global namespace. + */ +# include + + /* + * Skip inttypes.h if we already have all the required macros. If we + * have the macros, we assume that we have the matching typedefs too. + */ +# if !defined(UINT32_C) || !defined(UINT64_C) \ + || !defined(UINT32_MAX) || !defined(UINT64_MAX) + /* + * MSVC versions older than 2013 have no C99 support, and + * thus they cannot be used to compile liblzma. Using an + * existing liblzma.dll with old MSVC can work though(*), + * but we need to define the required standard integer + * types here in a MSVC-specific way. + * + * (*) If you do this, the existing liblzma.dll probably uses + * a different runtime library than your MSVC-built + * application. Mixing runtimes is generally bad, but + * in this case it should work as long as you avoid + * the few rarely-needed liblzma functions that allocate + * memory and expect the caller to free it using free(). + */ +# if defined(_WIN32) && defined(_MSC_VER) && _MSC_VER < 1800 + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else + /* Use the standard inttypes.h. */ +# ifdef __cplusplus + /* + * C99 sections 7.18.2 and 7.18.4 specify + * that C++ implementations define the limit + * and constant macros only if specifically + * requested. Note that if you want the + * format macros (PRIu64 etc.) too, you need + * to define __STDC_FORMAT_MACROS before + * including lzma.h, since re-including + * inttypes.h with __STDC_FORMAT_MACROS + * defined doesn't necessarily work. + */ +# ifndef __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS 1 +# endif +# ifndef __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS 1 +# endif +# endif + +# include +# endif + + /* + * Some old systems have only the typedefs in inttypes.h, and + * lack all the macros. For those systems, we need a few more + * hacks. We assume that unsigned int is 32-bit and unsigned + * long is either 32-bit or 64-bit. If these hacks aren't + * enough, the application has to setup the types manually + * before including lzma.h. + */ +# ifndef UINT32_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT32_C(n) n ## UI32 +# else +# define UINT32_C(n) n ## U +# endif +# endif + +# ifndef UINT64_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT64_C(n) n ## UI64 +# else + /* Get ULONG_MAX. */ +# include +# if ULONG_MAX == 4294967295UL +# define UINT64_C(n) n ## ULL +# else +# define UINT64_C(n) n ## UL +# endif +# endif +# endif + +# ifndef UINT32_MAX +# define UINT32_MAX (UINT32_C(4294967295)) +# endif + +# ifndef UINT64_MAX +# define UINT64_MAX (UINT64_C(18446744073709551615)) +# endif +# endif +#endif /* ifdef LZMA_MANUAL_HEADERS */ + + +/****************** + * LZMA_API macro * + ******************/ + +/* + * Some systems require that the functions and function pointers are + * declared specially in the headers. LZMA_API_IMPORT is for importing + * symbols and LZMA_API_CALL is to specify the calling convention. + * + * By default it is assumed that the application will link dynamically + * against liblzma. #define LZMA_API_STATIC in your application if you + * want to link against static liblzma. If you don't care about portability + * to operating systems like Windows, or at least don't care about linking + * against static liblzma on them, don't worry about LZMA_API_STATIC. That + * is, most developers will never need to use LZMA_API_STATIC. + * + * The GCC variants are a special case on Windows (Cygwin and MinGW). + * We rely on GCC doing the right thing with its auto-import feature, + * and thus don't use __declspec(dllimport). This way developers don't + * need to worry about LZMA_API_STATIC. Also the calling convention is + * omitted on Cygwin but not on MinGW. + */ +#ifndef LZMA_API_IMPORT +# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__GNUC__) +# define LZMA_API_IMPORT __declspec(dllimport) +# else +# define LZMA_API_IMPORT +# endif +#endif + +#ifndef LZMA_API_CALL +# if defined(_WIN32) && !defined(__CYGWIN__) +# define LZMA_API_CALL __cdecl +# else +# define LZMA_API_CALL +# endif +#endif + +#ifndef LZMA_API +# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL +#endif + + +/*********** + * nothrow * + ***********/ + +/* + * None of the functions in liblzma may throw an exception. Even + * the functions that use callback functions won't throw exceptions, + * because liblzma would break if a callback function threw an exception. + */ +#ifndef lzma_nothrow +# if defined(__cplusplus) +# if __cplusplus >= 201103L +# define lzma_nothrow noexcept +# else +# define lzma_nothrow throw() +# endif +# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +# define lzma_nothrow __attribute__((__nothrow__)) +# else +# define lzma_nothrow +# endif +#endif + + +/******************** + * GNU C extensions * + ********************/ + +/* + * GNU C extensions are used conditionally in the public API. It doesn't + * break anything if these are sometimes enabled and sometimes not, only + * affects warnings and optimizations. + */ +#if __GNUC__ >= 3 +# ifndef lzma_attribute +# define lzma_attribute(attr) __attribute__(attr) +# endif + + /* warn_unused_result was added in GCC 3.4. */ +# ifndef lzma_attr_warn_unused_result +# if __GNUC__ == 3 && __GNUC_MINOR__ < 4 +# define lzma_attr_warn_unused_result +# endif +# endif + +#else +# ifndef lzma_attribute +# define lzma_attribute(attr) +# endif +#endif + + +#ifndef lzma_attr_pure +# define lzma_attr_pure lzma_attribute((__pure__)) +#endif + +#ifndef lzma_attr_const +# define lzma_attr_const lzma_attribute((__const__)) +#endif + +#ifndef lzma_attr_warn_unused_result +# define lzma_attr_warn_unused_result \ + lzma_attribute((__warn_unused_result__)) +#endif + + +/************** + * Subheaders * + **************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Subheaders check that this is defined. It is to prevent including + * them directly from applications. + */ +#define LZMA_H_INTERNAL 1 + +/* Basic features */ +#include "lzma/version.h" +#include "lzma/base.h" +#include "lzma/vli.h" +#include "lzma/check.h" + +/* Filters */ +#include "lzma/filter.h" +#include "lzma/bcj.h" +#include "lzma/delta.h" +#include "lzma/lzma12.h" + +/* Container formats */ +#include "lzma/container.h" + +/* Advanced features */ +#include "lzma/stream_flags.h" +#include "lzma/block.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" + +/* Hardware information */ +#include "lzma/hardware.h" + +/* + * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications + * re-including the subheaders. + */ +#undef LZMA_H_INTERNAL + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef LZMA_H */ diff --git a/third-party/libxz/include/lzma/base.h b/third-party/libxz/include/lzma/base.h new file mode 100644 index 00000000..a6005acc --- /dev/null +++ b/third-party/libxz/include/lzma/base.h @@ -0,0 +1,659 @@ +/** + * \file lzma/base.h + * \brief Data types and functions used in many places in liblzma API + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Boolean + * + * This is here because C89 doesn't have stdbool.h. To set a value for + * variables having type lzma_bool, you can use + * - C99's `true' and `false' from stdbool.h; + * - C++'s internal `true' and `false'; or + * - integers one (true) and zero (false). + */ +typedef unsigned char lzma_bool; + + +/** + * \brief Type of reserved enumeration variable in structures + * + * To avoid breaking library ABI when new features are added, several + * structures contain extra variables that may be used in future. Since + * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may + * even vary depending on the range of enumeration constants, we specify + * a separate type to be used for reserved enumeration variables. All + * enumeration constants in liblzma API will be non-negative and less + * than 128, which should guarantee that the ABI won't break even when + * new constants are added to existing enumerations. + */ +typedef enum { + LZMA_RESERVED_ENUM = 0 +} lzma_reserved_enum; + + +/** + * \brief Return values used by several functions in liblzma + * + * Check the descriptions of specific functions to find out which return + * values they can return. With some functions the return values may have + * more specific meanings than described here; those differences are + * described per-function basis. + */ +typedef enum { + LZMA_OK = 0, + /**< + * \brief Operation completed successfully + */ + + LZMA_STREAM_END = 1, + /**< + * \brief End of stream was reached + * + * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or + * LZMA_FINISH was finished. In decoder, this indicates + * that all the data was successfully decoded. + * + * In all cases, when LZMA_STREAM_END is returned, the last + * output bytes should be picked from strm->next_out. + */ + + LZMA_NO_CHECK = 2, + /**< + * \brief Input stream has no integrity check + * + * This return value can be returned only if the + * LZMA_TELL_NO_CHECK flag was used when initializing + * the decoder. LZMA_NO_CHECK is just a warning, and + * the decoding can be continued normally. + * + * It is possible to call lzma_get_check() immediately after + * lzma_code has returned LZMA_NO_CHECK. The result will + * naturally be LZMA_CHECK_NONE, but the possibility to call + * lzma_get_check() may be convenient in some applications. + */ + + LZMA_UNSUPPORTED_CHECK = 3, + /**< + * \brief Cannot calculate the integrity check + * + * The usage of this return value is different in encoders + * and decoders. + * + * Encoders can return this value only from the initialization + * function. If initialization fails with this value, the + * encoding cannot be done, because there's no way to produce + * output with the correct integrity check. + * + * Decoders can return this value only from lzma_code() and + * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when + * initializing the decoder. The decoding can still be + * continued normally even if the check type is unsupported, + * but naturally the check will not be validated, and possible + * errors may go undetected. + * + * With decoder, it is possible to call lzma_get_check() + * immediately after lzma_code() has returned + * LZMA_UNSUPPORTED_CHECK. This way it is possible to find + * out what the unsupported Check ID was. + */ + + LZMA_GET_CHECK = 4, + /**< + * \brief Integrity check type is now available + * + * This value can be returned only by the lzma_code() function + * and only if the decoder was initialized with the + * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the + * application that it may now call lzma_get_check() to find + * out the Check ID. This can be used, for example, to + * implement a decoder that accepts only files that have + * strong enough integrity check. + */ + + LZMA_MEM_ERROR = 5, + /**< + * \brief Cannot allocate memory + * + * Memory allocation failed, or the size of the allocation + * would be greater than SIZE_MAX. + * + * Due to internal implementation reasons, the coding cannot + * be continued even if more memory were made available after + * LZMA_MEM_ERROR. + */ + + LZMA_MEMLIMIT_ERROR = 6, + /** + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased with + * lzma_memlimit_set(). + */ + + LZMA_FORMAT_ERROR = 7, + /**< + * \brief File format not recognized + * + * The decoder did not recognize the input as supported file + * format. This error can occur, for example, when trying to + * decode .lzma format file with lzma_stream_decoder, + * because lzma_stream_decoder accepts only the .xz format. + */ + + LZMA_OPTIONS_ERROR = 8, + /**< + * \brief Invalid or unsupported options + * + * Invalid or unsupported options, for example + * - unsupported filter(s) or filter options; or + * - reserved bits set in headers (decoder only). + * + * Rebuilding liblzma with more features enabled, or + * upgrading to a newer version of liblzma may help. + */ + + LZMA_DATA_ERROR = 9, + /**< + * \brief Data is corrupt + * + * The usage of this return value is different in encoders + * and decoders. In both encoder and decoder, the coding + * cannot continue after this error. + * + * Encoders return this if size limits of the target file + * format would be exceeded. These limits are huge, thus + * getting this error from an encoder is mostly theoretical. + * For example, the maximum compressed and uncompressed + * size of a .xz Stream is roughly 8 EiB (2^63 bytes). + * + * Decoders return this error if the input data is corrupt. + * This can mean, for example, invalid CRC32 in headers + * or invalid check of uncompressed data. + */ + + LZMA_BUF_ERROR = 10, + /**< + * \brief No progress is possible + * + * This error code is returned when the coder cannot consume + * any new input and produce any new output. The most common + * reason for this error is that the input stream being + * decoded is truncated or corrupt. + * + * This error is not fatal. Coding can be continued normally + * by providing more input and/or more output space, if + * possible. + * + * Typically the first call to lzma_code() that can do no + * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only + * the second consecutive call doing no progress will return + * LZMA_BUF_ERROR. This is intentional. + * + * With zlib, Z_BUF_ERROR may be returned even if the + * application is doing nothing wrong, so apps will need + * to handle Z_BUF_ERROR specially. The above hack + * guarantees that liblzma never returns LZMA_BUF_ERROR + * to properly written applications unless the input file + * is truncated or corrupt. This should simplify the + * applications a little. + */ + + LZMA_PROG_ERROR = 11, + /**< + * \brief Programming error + * + * This indicates that the arguments given to the function are + * invalid or the internal state of the decoder is corrupt. + * - Function arguments are invalid or the structures + * pointed by the argument pointers are invalid + * e.g. if strm->next_out has been set to NULL and + * strm->avail_out > 0 when calling lzma_code(). + * - lzma_* functions have been called in wrong order + * e.g. lzma_code() was called right after lzma_end(). + * - If errors occur randomly, the reason might be flaky + * hardware. + * + * If you think that your code is correct, this error code + * can be a sign of a bug in liblzma. See the documentation + * how to report bugs. + */ +} lzma_ret; + + +/** + * \brief The `action' argument for lzma_code() + * + * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER, + * or LZMA_FINISH, the same `action' must is used until lzma_code() returns + * LZMA_STREAM_END. Also, the amount of input (that is, strm->avail_in) must + * not be modified by the application until lzma_code() returns + * LZMA_STREAM_END. Changing the `action' or modifying the amount of input + * will make lzma_code() return LZMA_PROG_ERROR. + */ +typedef enum { + LZMA_RUN = 0, + /**< + * \brief Continue coding + * + * Encoder: Encode as much input as possible. Some internal + * buffering will probably be done (depends on the filter + * chain in use), which causes latency: the input used won't + * usually be decodeable from the output of the same + * lzma_code() call. + * + * Decoder: Decode as much input as possible and produce as + * much output as possible. + */ + + LZMA_SYNC_FLUSH = 1, + /**< + * \brief Make all the input available at output + * + * Normally the encoder introduces some latency. + * LZMA_SYNC_FLUSH forces all the buffered data to be + * available at output without resetting the internal + * state of the encoder. This way it is possible to use + * compressed stream for example for communication over + * network. + * + * Only some filters support LZMA_SYNC_FLUSH. Trying to use + * LZMA_SYNC_FLUSH with filters that don't support it will + * make lzma_code() return LZMA_OPTIONS_ERROR. For example, + * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. + * + * Using LZMA_SYNC_FLUSH very often can dramatically reduce + * the compression ratio. With some filters (for example, + * LZMA2), fine-tuning the compression options may help + * mitigate this problem significantly (for example, + * match finder with LZMA2). + * + * Decoders don't support LZMA_SYNC_FLUSH. + */ + + LZMA_FULL_FLUSH = 2, + /**< + * \brief Finish encoding of the current Block + * + * All the input data going to the current Block must have + * been given to the encoder (the last bytes can still be + * pending in *next_in). Call lzma_code() with LZMA_FULL_FLUSH + * until it returns LZMA_STREAM_END. Then continue normally + * with LZMA_RUN or finish the Stream with LZMA_FINISH. + * + * This action is currently supported only by Stream encoder + * and easy encoder (which uses Stream encoder). If there is + * no unfinished Block, no empty Block is created. + */ + + LZMA_FULL_BARRIER = 4, + /**< + * \brief Finish encoding of the current Block + * + * This is like LZMA_FULL_FLUSH except that this doesn't + * necessarily wait until all the input has been made + * available via the output buffer. That is, lzma_code() + * might return LZMA_STREAM_END as soon as all the input + * has been consumed (avail_in == 0). + * + * LZMA_FULL_BARRIER is useful with a threaded encoder if + * one wants to split the .xz Stream into Blocks at specific + * offsets but doesn't care if the output isn't flushed + * immediately. Using LZMA_FULL_BARRIER allows keeping + * the threads busy while LZMA_FULL_FLUSH would make + * lzma_code() wait until all the threads have finished + * until more data could be passed to the encoder. + * + * With a lzma_stream initialized with the single-threaded + * lzma_stream_encoder() or lzma_easy_encoder(), + * LZMA_FULL_BARRIER is an alias for LZMA_FULL_FLUSH. + */ + + LZMA_FINISH = 3 + /**< + * \brief Finish the coding operation + * + * All the input data must have been given to the encoder + * (the last bytes can still be pending in next_in). + * Call lzma_code() with LZMA_FINISH until it returns + * LZMA_STREAM_END. Once LZMA_FINISH has been used, + * the amount of input must no longer be changed by + * the application. + * + * When decoding, using LZMA_FINISH is optional unless the + * LZMA_CONCATENATED flag was used when the decoder was + * initialized. When LZMA_CONCATENATED was not used, the only + * effect of LZMA_FINISH is that the amount of input must not + * be changed just like in the encoder. + */ +} lzma_action; + + +/** + * \brief Custom functions for memory handling + * + * A pointer to lzma_allocator may be passed via lzma_stream structure + * to liblzma, and some advanced functions take a pointer to lzma_allocator + * as a separate function argument. The library will use the functions + * specified in lzma_allocator for memory handling instead of the default + * malloc() and free(). C++ users should note that the custom memory + * handling functions must not throw exceptions. + * + * Single-threaded mode only: liblzma doesn't make an internal copy of + * lzma_allocator. Thus, it is OK to change these function pointers in + * the middle of the coding process, but obviously it must be done + * carefully to make sure that the replacement `free' can deallocate + * memory allocated by the earlier `alloc' function(s). + * + * Multithreaded mode: liblzma might internally store pointers to the + * lzma_allocator given via the lzma_stream structure. The application + * must not change the allocator pointer in lzma_stream or the contents + * of the pointed lzma_allocator structure until lzma_end() has been used + * to free the memory associated with that lzma_stream. The allocation + * functions might be called simultaneously from multiple threads, and + * thus they must be thread safe. + */ +typedef struct { + /** + * \brief Pointer to a custom memory allocation function + * + * If you don't want a custom allocator, but still want + * custom free(), set this to NULL and liblzma will use + * the standard malloc(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param nmemb Number of elements like in calloc(). liblzma + * will always set nmemb to 1, so it is safe to + * ignore nmemb in a custom allocator if you like. + * The nmemb argument exists only for + * compatibility with zlib and libbzip2. + * \param size Size of an element in bytes. + * liblzma never sets this to zero. + * + * \return Pointer to the beginning of a memory block of + * `size' bytes, or NULL if allocation fails + * for some reason. When allocation fails, functions + * of liblzma return LZMA_MEM_ERROR. + * + * The allocator should not waste time zeroing the allocated buffers. + * This is not only about speed, but also memory usage, since the + * operating system kernel doesn't necessarily allocate the requested + * memory in physical memory until it is actually used. With small + * input files, liblzma may actually need only a fraction of the + * memory that it requested for allocation. + * + * \note LZMA_MEM_ERROR is also used when the size of the + * allocation would be greater than SIZE_MAX. Thus, + * don't assume that the custom allocator must have + * returned NULL if some function from liblzma + * returns LZMA_MEM_ERROR. + */ + void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); + + /** + * \brief Pointer to a custom memory freeing function + * + * If you don't want a custom freeing function, but still + * want a custom allocator, set this to NULL and liblzma + * will use the standard free(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param ptr Pointer returned by lzma_allocator.alloc(), + * or when it is set to NULL, a pointer returned + * by the standard malloc(). + */ + void (LZMA_API_CALL *free)(void *opaque, void *ptr); + + /** + * \brief Pointer passed to .alloc() and .free() + * + * opaque is passed as the first argument to lzma_allocator.alloc() + * and lzma_allocator.free(). This intended to ease implementing + * custom memory allocation functions for use with liblzma. + * + * If you don't need this, you should set this to NULL. + */ + void *opaque; + +} lzma_allocator; + + +/** + * \brief Internal data structure + * + * The contents of this structure is not visible outside the library. + */ +typedef struct lzma_internal_s lzma_internal; + + +/** + * \brief Passing data to and from liblzma + * + * The lzma_stream structure is used for + * - passing pointers to input and output buffers to liblzma; + * - defining custom memory hander functions; and + * - holding a pointer to coder-specific internal data structures. + * + * Typical usage: + * + * - After allocating lzma_stream (on stack or with malloc()), it must be + * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). + * + * - Initialize a coder to the lzma_stream, for example by using + * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: + * - In contrast to zlib, strm->next_in and strm->next_out are + * ignored by all initialization functions, thus it is safe + * to not initialize them yet. + * - The initialization functions always set strm->total_in and + * strm->total_out to zero. + * - If the initialization function fails, no memory is left allocated + * that would require freeing with lzma_end() even if some memory was + * associated with the lzma_stream structure when the initialization + * function was called. + * + * - Use lzma_code() to do the actual work. + * + * - Once the coding has been finished, the existing lzma_stream can be + * reused. It is OK to reuse lzma_stream with different initialization + * function without calling lzma_end() first. Old allocations are + * automatically freed. + * + * - Finally, use lzma_end() to free the allocated memory. lzma_end() never + * frees the lzma_stream structure itself. + * + * Application may modify the values of total_in and total_out as it wants. + * They are updated by liblzma to match the amount of data read and + * written but aren't used for anything else except as a possible return + * values from lzma_get_progress(). + */ +typedef struct { + const uint8_t *next_in; /**< Pointer to the next input byte. */ + size_t avail_in; /**< Number of available input bytes in next_in. */ + uint64_t total_in; /**< Total number of bytes read by liblzma. */ + + uint8_t *next_out; /**< Pointer to the next output position. */ + size_t avail_out; /**< Amount of free space in next_out. */ + uint64_t total_out; /**< Total number of bytes written by liblzma. */ + + /** + * \brief Custom memory allocation functions + * + * In most cases this is NULL which makes liblzma use + * the standard malloc() and free(). + * + * \note In 5.0.x this is not a const pointer. + */ + const lzma_allocator *allocator; + + /** Internal state is not visible to applications. */ + lzma_internal *internal; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. Excluding the initialization of this structure, + * you should not touch these, because the names of these variables + * may change. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + void *reserved_ptr4; + uint64_t reserved_int1; + uint64_t reserved_int2; + size_t reserved_int3; + size_t reserved_int4; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + +} lzma_stream; + + +/** + * \brief Initialization for lzma_stream + * + * When you declare an instance of lzma_stream, you can immediately + * initialize it so that initialization functions know that no memory + * has been allocated yet: + * + * lzma_stream strm = LZMA_STREAM_INIT; + * + * If you need to initialize a dynamically allocated lzma_stream, you can use + * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this + * violates the C standard since NULL may have different internal + * representation than zero, but it should be portable enough in practice. + * Anyway, for maximum portability, you can use something like this: + * + * lzma_stream tmp = LZMA_STREAM_INIT; + * *strm = tmp; + */ +#define LZMA_STREAM_INIT \ + { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ + NULL, NULL, NULL, NULL, 0, 0, 0, 0, \ + LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } + + +/** + * \brief Encode or decode data + * + * Once the lzma_stream has been successfully initialized (e.g. with + * lzma_stream_encoder()), the actual encoding or decoding is done + * using this function. The application has to update strm->next_in, + * strm->avail_in, strm->next_out, and strm->avail_out to pass input + * to and get output from liblzma. + * + * See the description of the coder-specific initialization function to find + * out what `action' values are supported by the coder. + */ +extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Free memory allocated for the coder data structures + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other + * members of the lzma_stream structure are touched. + * + * \note zlib indicates an error if application end()s unfinished + * stream structure. liblzma doesn't do this, and assumes that + * application knows what it is doing. + */ +extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; + + +/** + * \brief Get progress information + * + * In single-threaded mode, applications can get progress information from + * strm->total_in and strm->total_out. In multi-threaded mode this is less + * useful because a significant amount of both input and output data gets + * buffered internally by liblzma. This makes total_in and total_out give + * misleading information and also makes the progress indicator updates + * non-smooth. + * + * This function gives realistic progress information also in multi-threaded + * mode by taking into account the progress made by each thread. In + * single-threaded mode *progress_in and *progress_out are set to + * strm->total_in and strm->total_out, respectively. + */ +extern LZMA_API(void) lzma_get_progress(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow; + + +/** + * \brief Get the memory usage of decoder filter chain + * + * This function is currently supported only when *strm has been initialized + * with a function that takes a memlimit argument. With other functions, you + * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() + * to estimate the memory requirements. + * + * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big + * the memory usage limit should have been to decode the input. Note that + * this may give misleading information if decoding .xz Streams that have + * multiple Blocks, because each Block can have different memory requirements. + * + * \return How much memory is currently allocated for the filter + * decoders. If no filter chain is currently allocated, + * some non-zero value is still returned, which is less than + * or equal to what any filter chain would indicate as its + * memory requirement. + * + * If this function isn't supported by *strm or some other error + * occurs, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the current memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return On success, the current memory usage limit is returned + * (always non-zero). On error, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * liblzma 5.2.3 and earlier has a bug where memlimit value of 0 causes + * this function to do nothing (leaving the limit unchanged) and still + * return LZMA_OK. Later versions treat 0 as if 1 had been specified (so + * lzma_memlimit_get() will return 1 even if you specify 0 here). + * + * \return - LZMA_OK: New memory usage limit successfully set. + * - LZMA_MEMLIMIT_ERROR: The new limit is too small. + * The limit was not changed. + * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't + * support memory usage limit. + */ +extern LZMA_API(lzma_ret) lzma_memlimit_set( + lzma_stream *strm, uint64_t memlimit) lzma_nothrow; diff --git a/third-party/libxz/include/lzma/bcj.h b/third-party/libxz/include/lzma/bcj.h new file mode 100644 index 00000000..8e37538a --- /dev/null +++ b/third-party/libxz/include/lzma/bcj.h @@ -0,0 +1,90 @@ +/** + * \file lzma/bcj.h + * \brief Branch/Call/Jump conversion filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* Filter IDs for lzma_filter.id */ + +#define LZMA_FILTER_X86 LZMA_VLI_C(0x04) + /**< + * Filter for x86 binaries + */ + +#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05) + /**< + * Filter for Big endian PowerPC binaries + */ + +#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06) + /**< + * Filter for IA-64 (Itanium) binaries. + */ + +#define LZMA_FILTER_ARM LZMA_VLI_C(0x07) + /**< + * Filter for ARM binaries. + */ + +#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08) + /**< + * Filter for ARM-Thumb binaries. + */ + +#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09) + /**< + * Filter for SPARC binaries. + */ + + +/** + * \brief Options for BCJ filters + * + * The BCJ filters never change the size of the data. Specifying options + * for them is optional: if pointer to options is NULL, default value is + * used. You probably never need to specify options to BCJ filters, so just + * set the options pointer to NULL and be happy. + * + * If options with non-default values have been specified when encoding, + * the same options must also be specified when decoding. + * + * \note At the moment, none of the BCJ filters support + * LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified, + * LZMA_OPTIONS_ERROR will be returned. If there is need, + * partial support for LZMA_SYNC_FLUSH can be added in future. + * Partial means that flushing would be possible only at + * offsets that are multiple of 2, 4, or 16 depending on + * the filter, except x86 which cannot be made to support + * LZMA_SYNC_FLUSH predictably. + */ +typedef struct { + /** + * \brief Start offset for conversions + * + * This setting is useful only when the same filter is used + * _separately_ for multiple sections of the same executable file, + * and the sections contain cross-section branch/call/jump + * instructions. In that case it is beneficial to set the start + * offset of the non-first sections so that the relative addresses + * of the cross-section branch/call/jump instructions will use the + * same absolute addresses as in the first section. + * + * When the pointer to options is NULL, the default value (zero) + * is used. + */ + uint32_t start_offset; + +} lzma_options_bcj; diff --git a/third-party/libxz/include/lzma/block.h b/third-party/libxz/include/lzma/block.h new file mode 100644 index 00000000..7bdcfd7c --- /dev/null +++ b/third-party/libxz/include/lzma/block.h @@ -0,0 +1,581 @@ +/** + * \file lzma/block.h + * \brief .xz Block handling + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Options for the Block and Block Header encoders and decoders + * + * Different Block handling functions use different parts of this structure. + * Some read some members, other functions write, and some do both. Only the + * members listed for reading need to be initialized when the specified + * functions are called. The members marked for writing will be assigned + * new values at some point either by calling the given function or by + * later calls to lzma_code(). + */ +typedef struct { + /** + * \brief Block format version + * + * To prevent API and ABI breakages when new features are needed, + * a version number is used to indicate which fields in this + * structure are in use: + * - liblzma >= 5.0.0: version = 0 is supported. + * - liblzma >= 5.1.4beta: Support for version = 1 was added, + * which adds the ignore_check field. + * + * If version is greater than one, most Block related functions + * will return LZMA_OPTIONS_ERROR (lzma_block_header_decode() works + * with any version value). + * + * Read by: + * - All functions that take pointer to lzma_block as argument, + * including lzma_block_header_decode(). + * + * Written by: + * - lzma_block_header_decode() + */ + uint32_t version; + + /** + * \brief Size of the Block Header field + * + * This is always a multiple of four. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_size() + * - lzma_block_buffer_encode() + */ + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 + + /** + * \brief Type of integrity Check + * + * The Check ID is not stored into the Block Header, thus its value + * must be provided also when decoding. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_check check; + + /** + * \brief Size of the Compressed Data in bytes + * + * Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder + * will store this value to the Block Header. Block encoder doesn't + * care about this value, but will set it once the encoding has been + * finished. + * + * Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will + * verify that the size of the Compressed Data field matches + * compressed_size. + * + * Usually you don't know this value when encoding in streamed mode, + * and thus cannot write this field into the Block Header. + * + * In non-streamed mode you can reserve space for this field before + * encoding the actual Block. After encoding the data, finish the + * Block by encoding the Block Header. Steps in detail: + * + * - Set compressed_size to some big enough value. If you don't know + * better, use LZMA_VLI_MAX, but remember that bigger values take + * more space in Block Header. + * + * - Call lzma_block_header_size() to see how much space you need to + * reserve for the Block Header. + * + * - Encode the Block using lzma_block_encoder() and lzma_code(). + * It sets compressed_size to the correct value. + * + * - Use lzma_block_header_encode() to encode the Block Header. + * Because space was reserved in the first step, you don't need + * to call lzma_block_header_size() anymore, because due to + * reserving, header_size has to be big enough. If it is "too big", + * lzma_block_header_encode() will add enough Header Padding to + * make Block Header to match the size specified by header_size. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed Size in bytes + * + * This is handled very similarly to compressed_size above. + * + * uncompressed_size is needed by fewer functions than + * compressed_size. This is because uncompressed_size isn't + * needed to validate that Block stays within proper limits. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli uncompressed_size; + + /** + * \brief Array of filters + * + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_UNKNOWN. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_UNKNOWN, the actual array must + * have LZMA_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. + */ + lzma_filter *filters; + + /** + * \brief Raw value stored in the Check field + * + * After successful coding, the first lzma_check_size(check) bytes + * of this array contain the raw value stored in the Check field. + * + * Note that CRC32 and CRC64 are stored in little endian byte order. + * Take it into account if you display the Check values to the user. + * + * Written by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + uint8_t raw_check[LZMA_CHECK_SIZE_MAX]; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + uint32_t reserved_int1; + uint32_t reserved_int2; + lzma_vli reserved_int3; + lzma_vli reserved_int4; + lzma_vli reserved_int5; + lzma_vli reserved_int6; + lzma_vli reserved_int7; + lzma_vli reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + + /** + * \brief A flag to Block decoder to not verify the Check field + * + * This field is supported by liblzma >= 5.1.4beta if .version >= 1. + * + * If this is set to true, the integrity check won't be calculated + * and verified. Unless you know what you are doing, you should + * leave this to false. (A reason to set this to true is when the + * file integrity is verified externally anyway and you want to + * speed up the decompression, which matters mostly when using + * SHA-256 as the integrity check.) + * + * If .version >= 1, read by: + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by (.version is ignored): + * - lzma_block_header_decode() always sets this to false + */ + lzma_bool ignore_check; + + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + +} lzma_block; + + +/** + * \brief Decode the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) + + +/** + * \brief Calculate Block Header Size + * + * Calculate the minimum size needed for the Block Header field using the + * settings specified in the lzma_block structure. Note that it is OK to + * increase the calculated header_size value as long as it is a multiple of + * four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size + * just means that lzma_block_header_encode() will add Header Padding. + * + * \return - LZMA_OK: Size calculated successfully and stored to + * block->header_size. + * - LZMA_OPTIONS_ERROR: Unsupported version, filters or + * filter options. + * - LZMA_PROG_ERROR: Invalid values like compressed_size == 0. + * + * \note This doesn't check that all the options are valid i.e. this + * may return LZMA_OK even if lzma_block_header_encode() or + * lzma_block_encoder() would fail. If you want to validate the + * filter chain, consider using lzma_memlimit_encoder() which as + * a side-effect validates the filter chain. + */ +extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Block Header + * + * The caller must have calculated the size of the Block Header already with + * lzma_block_header_size(). If a value larger than the one calculated by + * lzma_block_header_size() is used, the Block Header will be padded to the + * specified size. + * + * \param out Beginning of the output buffer. This must be + * at least block->header_size bytes. + * \param block Block options to be encoded. + * + * \return - LZMA_OK: Encoding was successful. block->header_size + * bytes were written to output buffer. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_encode( + const lzma_block *block, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Block Header + * + * block->version should (usually) be set to the highest value supported + * by the application. If the application sets block->version to a value + * higher than supported by the current liblzma version, this function will + * downgrade block->version to the highest value supported by it. Thus one + * should check the value of block->version after calling this function if + * block->version was set to a non-zero value and the application doesn't + * otherwise know that the liblzma version being used is new enough to + * support the specified block->version. + * + * The size of the Block Header must have already been decoded with + * lzma_block_header_size_decode() macro and stored to block->header_size. + * + * The integrity check type from Stream Header must have been stored + * to block->check. + * + * block->filters must have been allocated, but they don't need to be + * initialized (possible existing filter options are not freed). + * + * \param block Destination for Block options. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() (and also free() + * if an error occurs). + * \param in Beginning of the input buffer. This must be + * at least block->header_size bytes. + * + * \return - LZMA_OK: Decoding was successful. block->header_size + * bytes were read from the input buffer. + * - LZMA_OPTIONS_ERROR: The Block Header specifies some + * unsupported options such as unsupported filters. This can + * happen also if block->version was set to a too low value + * compared to what would be required to properly represent + * the information stored in the Block Header. + * - LZMA_DATA_ERROR: Block Header is corrupt, for example, + * the CRC32 doesn't match. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, + const lzma_allocator *allocator, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Validate and set Compressed Size according to Unpadded Size + * + * Block Header stores Compressed Size, but Index has Unpadded Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Unpadded Size. This function does + * exactly that with error checking: + * + * - Compressed Size calculated from Unpadded Size must be positive integer, + * that is, Unpadded Size must be big enough that after Block Header and + * Check fields there's still at least one byte for Compressed Size. + * + * - If Compressed Size was present in Block Header, the new value + * calculated from Unpadded Size is compared against the value + * from Block Header. + * + * \note This function must be called _after_ decoding the Block Header + * field so that it can properly validate Compressed Size if it + * was present in Block Header. + * + * \return - LZMA_OK: block->compressed_size was set successfully. + * - LZMA_DATA_ERROR: unpadded_size is too small compared to + * block->header_size and lzma_check_size(block->check). + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * block->header_size must be a multiple of four and + * between 8 and 1024 inclusive. + */ +extern LZMA_API(lzma_ret) lzma_block_compressed_size( + lzma_block *block, lzma_vli unpadded_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate Unpadded Size + * + * The Index field stores Unpadded Size and Uncompressed Size. The latter + * can be taken directly from the lzma_block structure after coding a Block, + * but Unpadded Size needs to be calculated from Block Header Size, + * Compressed Size, and size of the Check field. This is where this function + * is needed. + * + * \return Unpadded Size on success, or zero on error. + */ +extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. + */ +extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Block encoder + * + * Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the + * filter chain supports it), and LZMA_FINISH. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID + * that is not supported by this buid of liblzma. Initializing + * the encoder failed. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_encoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Block decoder + * + * Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using + * LZMA_FINISH is not required. It is supported only for convenience. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but + * the given Check ID is not supported, thus Check will be + * ignored. + * - LZMA_PROG_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_decoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate maximum output size for single-call Block encoding + * + * This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks. + * See the documentation of lzma_stream_buffer_bound(). + */ +extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Block encoder + * + * In contrast to the multi-call encoder initialized with + * lzma_block_encoder(), this function encodes also the Block Header. This + * is required to make it possible to write appropriate Block Header also + * in case the data isn't compressible, and different filter chain has to be + * used to encode the data in uncompressed form using uncompressed chunks + * of the LZMA2 filter. + * + * When the data isn't compressible, header_size, compressed_size, and + * uncompressed_size are set just like when the data was compressible, but + * it is possible that header_size is too small to hold the filter chain + * specified in block->filters, because that isn't necessarily the filter + * chain that was actually used to encode the data. lzma_block_unpadded_size() + * still works normally, because it doesn't read the filters array. + * + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_encode( + lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call uncompressed .xz Block encoder + * + * This is like lzma_block_buffer_encode() except this doesn't try to + * compress the data and instead encodes the data using LZMA2 uncompressed + * chunks. The required output buffer size can be determined with + * lzma_block_buffer_bound(). + * + * Since the data won't be compressed, this function ignores block->filters. + * This function doesn't take lzma_allocator because this function doesn't + * allocate any memory from the heap. + */ +extern LZMA_API(lzma_ret) lzma_block_uncomp_encode(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Block decoder + * + * This is single-call equivalent of lzma_block_decoder(), and requires that + * the caller has already decoded Block Header and checked its memory usage. + * + * \param block Block options just like with lzma_block_decoder(). + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_MEM_ERROR + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_decode( + lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow; diff --git a/third-party/libxz/include/lzma/check.h b/third-party/libxz/include/lzma/check.h new file mode 100644 index 00000000..6a243db0 --- /dev/null +++ b/third-party/libxz/include/lzma/check.h @@ -0,0 +1,150 @@ +/** + * \file lzma/check.h + * \brief Integrity checks + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Type of the integrity check (Check ID) + * + * The .xz format supports multiple types of checks that are calculated + * from the uncompressed data. They vary in both speed and ability to + * detect errors. + */ +typedef enum { + LZMA_CHECK_NONE = 0, + /**< + * No Check is calculated. + * + * Size of the Check field: 0 bytes + */ + + LZMA_CHECK_CRC32 = 1, + /**< + * CRC32 using the polynomial from the IEEE 802.3 standard + * + * Size of the Check field: 4 bytes + */ + + LZMA_CHECK_CRC64 = 4, + /**< + * CRC64 using the polynomial from the ECMA-182 standard + * + * Size of the Check field: 8 bytes + */ + + LZMA_CHECK_SHA256 = 10 + /**< + * SHA-256 + * + * Size of the Check field: 32 bytes + */ +} lzma_check; + + +/** + * \brief Maximum valid Check ID + * + * The .xz file format specification specifies 16 Check IDs (0-15). Some + * of them are only reserved, that is, no actual Check algorithm has been + * assigned. When decoding, liblzma still accepts unknown Check IDs for + * future compatibility. If a valid but unsupported Check ID is detected, + * liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK, + * LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h. + */ +#define LZMA_CHECK_ID_MAX 15 + + +/** + * \brief Test if the given Check ID is supported + * + * Return true if the given Check ID is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * is not in the range [0, 15]; in that case the return value is always false. + * + * You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always + * supported (even if liblzma is built with limited features). + */ +extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Get the size of the Check field with the given Check ID + * + * Although not all Check IDs have a check algorithm associated, the size of + * every Check is already frozen. This function returns the size (in bytes) of + * the Check field with the specified Check ID. The values are: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } + * + * If the argument is not in the range [0, 15], UINT32_MAX is returned. + */ +extern LZMA_API(uint32_t) lzma_check_size(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + +/** + * \brief Calculate CRC32 + * + * Calculate CRC32 using the polynomial from the IEEE 802.3 standard. + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when starting a new calculation. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern LZMA_API(uint32_t) lzma_crc32( + const uint8_t *buf, size_t size, uint32_t crc) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate CRC64 + * + * Calculate CRC64 using the polynomial from the ECMA-182 standard. + * + * This function is used similarly to lzma_crc32(). See its documentation. + */ +extern LZMA_API(uint64_t) lzma_crc64( + const uint8_t *buf, size_t size, uint64_t crc) + lzma_nothrow lzma_attr_pure; + + +/* + * SHA-256 functions are currently not exported to public API. + * Contact Lasse Collin if you think it should be. + */ + + +/** + * \brief Get the type of the integrity check + * + * This function can be called only immediately after lzma_code() has + * returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK. + * Calling this function in any other situation has undefined behavior. + */ +extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) + lzma_nothrow; diff --git a/third-party/libxz/include/lzma/container.h b/third-party/libxz/include/lzma/container.h new file mode 100644 index 00000000..9fbf4df0 --- /dev/null +++ b/third-party/libxz/include/lzma/container.h @@ -0,0 +1,632 @@ +/** + * \file lzma/container.h + * \brief File formats + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/************ + * Encoding * + ************/ + +/** + * \brief Default compression preset + * + * It's not straightforward to recommend a default preset, because in some + * cases keeping the resource usage relatively low is more important that + * getting the maximum compression ratio. + */ +#define LZMA_PRESET_DEFAULT UINT32_C(6) + + +/** + * \brief Mask for preset level + * + * This is useful only if you need to extract the level from the preset + * variable. That should be rare. + */ +#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) + + +/* + * Preset flags + * + * Currently only one flag is defined. + */ + +/** + * \brief Extreme compression preset + * + * This flag modifies the preset to make the encoding significantly slower + * while improving the compression ratio only marginally. This is useful + * when you don't mind wasting time to get as small result as possible. + * + * This flag doesn't affect the memory usage requirements of the decoder (at + * least not significantly). The memory usage of the encoder may be increased + * a little but only at the lowest preset levels (0-3). + */ +#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) + + +/** + * \brief Multithreading options + */ +typedef struct { + /** + * \brief Flags + * + * Set this to zero if no flags are wanted. + * + * No flags are currently supported. + */ + uint32_t flags; + + /** + * \brief Number of worker threads to use + */ + uint32_t threads; + + /** + * \brief Maximum uncompressed size of a Block + * + * The encoder will start a new .xz Block every block_size bytes. + * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code() + * the caller may tell liblzma to start a new Block earlier. + * + * With LZMA2, a recommended block size is 2-4 times the LZMA2 + * dictionary size. With very small dictionaries, it is recommended + * to use at least 1 MiB block size for good compression ratio, even + * if this is more than four times the dictionary size. Note that + * these are only recommendations for typical use cases; feel free + * to use other values. Just keep in mind that using a block size + * less than the LZMA2 dictionary size is waste of RAM. + * + * Set this to 0 to let liblzma choose the block size depending + * on the compression options. For LZMA2 it will be 3*dict_size + * or 1 MiB, whichever is more. + * + * For each thread, about 3 * block_size bytes of memory will be + * allocated. This may change in later liblzma versions. If so, + * the memory usage will probably be reduced, not increased. + */ + uint64_t block_size; + + /** + * \brief Timeout to allow lzma_code() to return early + * + * Multithreading can make liblzma to consume input and produce + * output in a very bursty way: it may first read a lot of input + * to fill internal buffers, then no input or output occurs for + * a while. + * + * In single-threaded mode, lzma_code() won't return until it has + * either consumed all the input or filled the output buffer. If + * this is done in multithreaded mode, it may cause a call + * lzma_code() to take even tens of seconds, which isn't acceptable + * in all applications. + * + * To avoid very long blocking times in lzma_code(), a timeout + * (in milliseconds) may be set here. If lzma_code() would block + * longer than this number of milliseconds, it will return with + * LZMA_OK. Reasonable values are 100 ms or more. The xz command + * line tool uses 300 ms. + * + * If long blocking times are fine for you, set timeout to a special + * value of 0, which will disable the timeout mechanism and will make + * lzma_code() block until all the input is consumed or the output + * buffer has been filled. + * + * \note Even with a timeout, lzma_code() might sometimes take + * somewhat long time to return. No timing guarantees + * are made. + */ + uint32_t timeout; + + /** + * \brief Compression preset (level and possible flags) + * + * The preset is set just like with lzma_easy_encoder(). + * The preset is ignored if filters below is non-NULL. + */ + uint32_t preset; + + /** + * \brief Filter chain (alternative to a preset) + * + * If this is NULL, the preset above is used. Otherwise the preset + * is ignored and the filter chain specified here is used. + */ + const lzma_filter *filters; + + /** + * \brief Integrity check type + * + * See check.h for available checks. The xz command line tool + * defaults to LZMA_CHECK_CRC64, which is a good choice if you + * are unsure. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + uint64_t reserved_int5; + uint64_t reserved_int6; + uint64_t reserved_int7; + uint64_t reserved_int8; + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + void *reserved_ptr4; + +} lzma_mt; + + +/** + * \brief Calculate approximate memory usage of easy encoder + * + * This function is a wrapper for lzma_raw_encoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + * + * \return Number of bytes of memory required for the given + * preset when encoding. If an error occurs, for example + * due to unsupported preset, UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate decoder memory usage of a preset + * + * This function is a wrapper for lzma_raw_decoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + * + * \return Number of bytes of memory required to decompress a file + * that was compressed using the given preset. If an error + * occurs, for example due to unsupported preset, UINT64_MAX + * is returned. + */ +extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Stream encoder using a preset number + * + * This function is intended for those who just want to use the basic features + * if liblzma (that is, most developers out there). + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param preset Compression preset to use. A preset consist of level + * number and zero or more flags. Usually flags aren't + * used, so preset is simply a number [0, 9] which match + * the options -0 ... -9 of the xz command line tool. + * Additional flags can be be set using bitwise-or with + * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. + * \param check Integrity check type to use. See check.h for available + * checks. The xz command line tool defaults to + * LZMA_CHECK_CRC64, which is a good choice if you are + * unsure. LZMA_CHECK_CRC32 is good too as long as the + * uncompressed file is not many gigabytes. + * + * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to + * encode your data. + * - LZMA_MEM_ERROR: Memory allocation failed. + * - LZMA_OPTIONS_ERROR: The given compression preset is not + * supported by this build of liblzma. + * - LZMA_UNSUPPORTED_CHECK: The given check type is not + * supported by this liblzma build. + * - LZMA_PROG_ERROR: One or more of the parameters have values + * that will never be valid. For example, strm == NULL. + * + * If initialization fails (return value is not LZMA_OK), all the memory + * allocated for *strm by liblzma is always freed. Thus, there is no need + * to call lzma_end() after failed initialization. + * + * If initialization succeeds, use lzma_code() to do the actual encoding. + * Valid values for `action' (the second argument of lzma_code()) are + * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, + * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. + */ +extern LZMA_API(lzma_ret) lzma_easy_encoder( + lzma_stream *strm, uint32_t preset, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream encoding using a preset number + * + * The maximum required output buffer size can be calculated with + * lzma_stream_buffer_bound(). + * + * \param preset Compression preset to use. See the description + * in lzma_easy_encoder(). + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( + uint32_t preset, lzma_check check, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Initialize .xz Stream encoder using a custom filter chain + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for + * more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate approximate memory usage of multithreaded .xz encoder + * + * Since doing the encoding in threaded mode doesn't affect the memory + * requirements of single-threaded decompressor, you can use + * lzma_easy_decoder_memusage(options->preset) or + * lzma_raw_decoder_memusage(options->filters) to calculate + * the decompressor memory requirements. + * + * \param options Compression options + * + * \return Number of bytes of memory required for encoding with the + * given options. If an error occurs, for example due to + * unsupported preset or filter chain, UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage( + const lzma_mt *options) lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize multithreaded .xz Stream encoder + * + * This provides the functionality of lzma_easy_encoder() and + * lzma_stream_encoder() as a single function for multithreaded use. + * + * The supported actions for lzma_code() are LZMA_RUN, LZMA_FULL_FLUSH, + * LZMA_FULL_BARRIER, and LZMA_FINISH. Support for LZMA_SYNC_FLUSH might be + * added in the future. + * + * \param strm Pointer to properly prepared lzma_stream + * \param options Pointer to multithreaded compression options + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder_mt( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma encoder (legacy file format) + * + * The .lzma format is sometimes called the LZMA_Alone format, which is the + * reason for the name of this function. The .lzma format supports only the + * LZMA1 filter. There is no support for integrity checks like CRC32. + * + * Use this function if and only if you need to create files readable by + * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format + * is strongly recommended. + * + * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * No kind of flushing is supported, because the file format doesn't make + * it possible. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_encoder( + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate output buffer size for single-call Stream encoder + * + * When trying to compress uncompressible data, the encoded size will be + * slightly bigger than the input data. This function calculates how much + * output buffer space is required to be sure that lzma_stream_buffer_encode() + * doesn't return LZMA_BUF_ERROR. + * + * The calculated value is not exact, but it is guaranteed to be big enough. + * The actual maximum output space required may be slightly smaller (up to + * about 100 bytes). This should not be a problem in practice. + * + * If the calculated maximum size doesn't fit into size_t or would make the + * Stream grow past LZMA_VLI_MAX (which should never happen in practice), + * zero is returned to indicate the error. + * + * \note The limit calculated by this function applies only to + * single-call encoding. Multi-call encoding may (and probably + * will) have larger maximum expansion when encoding + * uncompressible data. Currently there is no function to + * calculate the maximum expansion of multi-call encoding. + */ +extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Stream encoder + * + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h + * for more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( + lzma_filter *filters, lzma_check check, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/************ + * Decoding * + ************/ + +/** + * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream + * being decoded has no integrity check. Note that when used with + * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK + * if LZMA_TELL_NO_CHECK is used. + */ +#define LZMA_TELL_NO_CHECK UINT32_C(0x01) + + +/** + * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input + * stream has an integrity check, but the type of the integrity check is not + * supported by this liblzma version or build. Such files can still be + * decoded, but the integrity check cannot be verified. + */ +#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) + + +/** + * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type + * of the integrity check is known. The type can then be got with + * lzma_get_check(). + */ +#define LZMA_TELL_ANY_CHECK UINT32_C(0x04) + + +/** + * This flag makes lzma_code() not calculate and verify the integrity check + * of the compressed data in .xz files. This means that invalid integrity + * check values won't be detected and LZMA_DATA_ERROR won't be returned in + * such cases. + * + * This flag only affects the checks of the compressed data itself; the CRC32 + * values in the .xz headers will still be verified normally. + * + * Don't use this flag unless you know what you are doing. Possible reasons + * to use this flag: + * + * - Trying to recover data from a corrupt .xz file. + * + * - Speeding up decompression, which matters mostly with SHA-256 + * or with files that have compressed extremely well. It's recommended + * to not use this flag for this purpose unless the file integrity is + * verified externally in some other way. + * + * Support for this flag was added in liblzma 5.1.4beta. + */ +#define LZMA_IGNORE_CHECK UINT32_C(0x10) + + +/** + * This flag enables decoding of concatenated files with file formats that + * allow concatenating compressed files as is. From the formats currently + * supported by liblzma, only the .xz format allows concatenated files. + * Concatenated files are not allowed with the legacy .lzma format. + * + * This flag also affects the usage of the `action' argument for lzma_code(). + * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END + * unless LZMA_FINISH is used as `action'. Thus, the application has to set + * LZMA_FINISH in the same way as it does when encoding. + * + * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH + * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. + */ +#define LZMA_CONCATENATED UINT32_C(0x08) + + +/** + * \brief Initialize .xz Stream decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. liblzma + * 5.2.3 and earlier don't allow 0 here and return + * LZMA_PROG_ERROR; later versions treat 0 as if 1 + * had been specified. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode .xz Streams and .lzma files with autodetection + * + * This decoder autodetects between the .xz and .lzma file formats, and + * calls lzma_stream_decoder() or lzma_alone_decoder() once the type + * of the input file has been detected. + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. liblzma + * 5.2.3 and earlier don't allow 0 here and return + * LZMA_PROG_ERROR; later versions treat 0 as if 1 + * had been specified. + * \param flags Bitwise-or of flags, or zero for no flags. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_auto_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma decoder (legacy file format) + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. liblzma + * 5.2.3 and earlier don't allow 0 here and return + * LZMA_PROG_ERROR; later versions treat 0 as if 1 + * had been specified. + * + * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but it's allowed because it may + * simplify certain types of applications. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_decoder( + lzma_stream *strm, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream decoder + * + * \param memlimit Pointer to how much memory the decoder is allowed + * to allocate. The value pointed by this pointer is + * modified if and only if LZMA_MEMLIMIT_ERROR is + * returned. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK + * is not allowed and will return LZMA_PROG_ERROR. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if decoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_NO_CHECK: This can be returned only if using + * the LZMA_TELL_NO_CHECK flag. + * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using + * the LZMA_TELL_UNSUPPORTED_CHECK flag. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( + uint64_t *memlimit, uint32_t flags, + const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/third-party/libxz/include/lzma/delta.h b/third-party/libxz/include/lzma/delta.h new file mode 100644 index 00000000..592fc4f8 --- /dev/null +++ b/third-party/libxz/include/lzma/delta.h @@ -0,0 +1,77 @@ +/** + * \file lzma/delta.h + * \brief Delta filter + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Delta filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03) + + +/** + * \brief Type of the delta calculation + * + * Currently only byte-wise delta is supported. Other possible types could + * be, for example, delta of 16/32/64-bit little/big endian integers, but + * these are not currently planned since byte-wise delta is almost as good. + */ +typedef enum { + LZMA_DELTA_TYPE_BYTE +} lzma_delta_type; + + +/** + * \brief Options for the Delta filter + * + * These options are needed by both encoder and decoder. + */ +typedef struct { + /** For now, this must always be LZMA_DELTA_TYPE_BYTE. */ + lzma_delta_type type; + + /** + * \brief Delta distance + * + * With the only currently supported type, LZMA_DELTA_TYPE_BYTE, + * the distance is as bytes. + * + * Examples: + * - 16-bit stereo audio: distance = 4 bytes + * - 24-bit RGB image data: distance = 3 bytes + */ + uint32_t dist; +# define LZMA_DELTA_DIST_MIN 1 +# define LZMA_DELTA_DIST_MAX 256 + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_delta; diff --git a/third-party/libxz/include/lzma/filter.h b/third-party/libxz/include/lzma/filter.h new file mode 100644 index 00000000..4e78752b --- /dev/null +++ b/third-party/libxz/include/lzma/filter.h @@ -0,0 +1,425 @@ +/** + * \file lzma/filter.h + * \brief Common filter related types and functions + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum number of filters in a chain + * + * A filter chain can have 1-4 filters, of which three are allowed to change + * the size of the data. Usually only one or two filters are needed. + */ +#define LZMA_FILTERS_MAX 4 + + +/** + * \brief Filter options + * + * This structure is used to pass Filter ID and a pointer filter's + * options to liblzma. A few functions work with a single lzma_filter + * structure, while most functions expect a filter chain. + * + * A filter chain is indicated with an array of lzma_filter structures. + * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter + * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to + * be able to hold any arbitrary filter chain. This is important when + * using lzma_block_header_decode() from block.h, because too small + * array would make liblzma write past the end of the filters array. + */ +typedef struct { + /** + * \brief Filter ID + * + * Use constants whose name begin with `LZMA_FILTER_' to specify + * different filters. In an array of lzma_filter structures, use + * LZMA_VLI_UNKNOWN to indicate end of filters. + * + * \note This is not an enum, because on some systems enums + * cannot be 64-bit. + */ + lzma_vli id; + + /** + * \brief Pointer to filter-specific options structure + * + * If the filter doesn't need options, set this to NULL. If id is + * set to LZMA_VLI_UNKNOWN, options is ignored, and thus + * doesn't need be initialized. + */ + void *options; + +} lzma_filter; + + +/** + * \brief Test if the given Filter ID is supported for encoding + * + * Return true if the give Filter ID is supported for encoding by this + * liblzma build. Otherwise false is returned. + * + * There is no way to list which filters are available in this particular + * liblzma version and build. It would be useless, because the application + * couldn't know what kind of options the filter would need. + */ +extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Test if the given Filter ID is supported for decoding + * + * Return true if the give Filter ID is supported for decoding by this + * liblzma build. Otherwise false is returned. + */ +extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Copy the filters array + * + * Copy the Filter IDs and filter-specific options from src to dest. + * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating + * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least + * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that + * src is smaller than that. + * + * Unless the filter-specific options is NULL, the Filter ID has to be + * supported by liblzma, because liblzma needs to know the size of every + * filter-specific options structure. The filter-specific options are not + * validated. If options is NULL, any unsupported Filter IDs are copied + * without returning an error. + * + * Old filter-specific options in dest are not freed, so dest doesn't + * need to be initialized by the caller in any way. + * + * If an error occurs, memory possibly already allocated by this function + * is always freed. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options + * is not NULL. + * - LZMA_PROG_ERROR: src or dest is NULL. + */ +extern LZMA_API(lzma_ret) lzma_filters_copy( + const lzma_filter *src, lzma_filter *dest, + const lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Calculate approximate memory requirements for raw encoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream encoders too because Block and Stream encoders don't + * need significantly more memory than raw encoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when encoding. If an error occurs, + * for example due to unsupported filter chain, + * UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate memory requirements for raw decoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream decoders too because Block and Stream decoders don't + * need significantly more memory than raw decoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when decoding. If an error occurs, + * for example due to unsupported filter chain, + * UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize raw encoder + * + * This function may be useful when implementing custom file formats. + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * + * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * filter chain supports it), or LZMA_FINISH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_encoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize raw decoder + * + * The initialization of raw decoder goes similarly to raw encoder. + * + * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using + * LZMA_FINISH is not required, it is supported just for convenience. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_decoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Update the filter chain in the encoder + * + * This function is for advanced users only. This function has two slightly + * different purposes: + * + * - After LZMA_FULL_FLUSH when using Stream encoder: Set a new filter + * chain, which will be used starting from the next Block. + * + * - After LZMA_SYNC_FLUSH using Raw, Block, or Stream encoder: Change + * the filter-specific options in the middle of encoding. The actual + * filters in the chain (Filter IDs) cannot be changed. In the future, + * it might become possible to change the filter options without + * using LZMA_SYNC_FLUSH. + * + * While rarely useful, this function may be called also when no data has + * been compressed yet. In that case, this function will behave as if + * LZMA_FULL_FLUSH (Stream encoder) or LZMA_SYNC_FLUSH (Raw or Block + * encoder) had been used right before calling this function. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filters_update( + lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; + + +/** + * \brief Single-call raw encoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + * + * \note There is no function to calculate how big output buffer + * would surely be big enough. (lzma_stream_buffer_bound() + * works only for lzma_stream_buffer_encode(); raw encoder + * won't necessarily meet that bound.) + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call raw decoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Get the size of the Filter Properties field + * + * This function may be useful when implementing custom file formats + * using the raw encoder and decoder. + * + * \param size Pointer to uint32_t to hold the size of the properties + * \param filter Filter ID and options (the size of the properties may + * vary depending on the options) + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note This function validates the Filter ID, but does not + * necessarily validate the options. Thus, it is possible + * that this returns LZMA_OK while the following call to + * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. + */ +extern LZMA_API(lzma_ret) lzma_properties_size( + uint32_t *size, const lzma_filter *filter) lzma_nothrow; + + +/** + * \brief Encode the Filter Properties field + * + * \param filter Filter ID and options + * \param props Buffer to hold the encoded options. The size of + * buffer must have been already determined with + * lzma_properties_size(). + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note Even this function won't validate more options than actually + * necessary. Thus, it is possible that encoding the properties + * succeeds but using the same options to initialize the encoder + * will fail. + * + * \note If lzma_properties_size() indicated that the size + * of the Filter Properties field is zero, calling + * lzma_properties_encode() is not required, but it + * won't do any harm either. + */ +extern LZMA_API(lzma_ret) lzma_properties_encode( + const lzma_filter *filter, uint8_t *props) lzma_nothrow; + + +/** + * \brief Decode the Filter Properties field + * + * \param filter filter->id must have been set to the correct + * Filter ID. filter->options doesn't need to be + * initialized (it's not freed by this function). The + * decoded options will be stored to filter->options. + * filter->options is set to NULL if there are no + * properties or if an error occurs. + * \param allocator Custom memory allocator used to allocate the + * options. Set to NULL to use the default malloc(), + * and in case of an error, also free(). + * \param props Input buffer containing the properties. + * \param props_size Size of the properties. This must be the exact + * size; giving too much or too little input will + * return LZMA_OPTIONS_ERROR. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_decode( + lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) lzma_nothrow; + + +/** + * \brief Calculate encoded size of a Filter Flags field + * + * Knowing the size of Filter Flags is useful to know when allocating + * memory to hold the encoded Filter Flags. + * + * \param size Pointer to integer to hold the calculated size + * \param filter Filter ID and associated options whose encoded + * size is to be calculated + * + * \return - LZMA_OK: *size set successfully. Note that this doesn't + * guarantee that filter->options is valid, thus + * lzma_filter_flags_encode() may still fail. + * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. + * - LZMA_PROG_ERROR: Invalid options + * + * \note If you need to calculate size of List of Filter Flags, + * you need to loop over every lzma_filter entry. + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_size( + uint32_t *size, const lzma_filter *filter) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Filter Flags into given buffer + * + * In contrast to some functions, this doesn't allocate the needed buffer. + * This is due to how this function is used internally by liblzma. + * + * \param filter Filter ID and options to be encoded + * \param out Beginning of the output buffer + * \param out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid options or not enough output + * buffer space (you should have checked it with + * lzma_filter_flags_size()). + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filter, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Filter Flags from given buffer + * + * The decoded result is stored into *filter. The old value of + * filter->options is not free()d. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_decode( + lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/third-party/libxz/include/lzma/hardware.h b/third-party/libxz/include/lzma/hardware.h new file mode 100644 index 00000000..5321d9af --- /dev/null +++ b/third-party/libxz/include/lzma/hardware.h @@ -0,0 +1,64 @@ +/** + * \file lzma/hardware.h + * \brief Hardware information + * + * Since liblzma can consume a lot of system resources, it also provides + * ways to limit the resource usage. Applications linking against liblzma + * need to do the actual decisions how much resources to let liblzma to use. + * To ease making these decisions, liblzma provides functions to find out + * the relevant capabilities of the underlaying hardware. Currently there + * is only a function to find out the amount of RAM, but in the future there + * will be also a function to detect how many concurrent threads the system + * can run. + * + * \note On some operating systems, these function may temporarily + * load a shared library or open file descriptor(s) to find out + * the requested hardware information. Unless the application + * assumes that specific file descriptors are not touched by + * other threads, this should have no effect on thread safety. + * Possible operations involving file descriptors will restart + * the syscalls if they return EINTR. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Get the total amount of physical memory (RAM) in bytes + * + * This function may be useful when determining a reasonable memory + * usage limit for decompressing or how much memory it is OK to use + * for compressing. + * + * \return On success, the total amount of physical memory in bytes + * is returned. If the amount of RAM cannot be determined, + * zero is returned. This can happen if an error occurs + * or if there is no code in liblzma to detect the amount + * of RAM on the specific operating system. + */ +extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow; + + +/** + * \brief Get the number of processor cores or threads + * + * This function may be useful when determining how many threads to use. + * If the hardware supports more than one thread per CPU core, the number + * of hardware threads is returned if that information is available. + * + * \brief On success, the number of available CPU threads or cores is + * returned. If this information isn't available or an error + * occurs, zero is returned. + */ +extern LZMA_API(uint32_t) lzma_cputhreads(void) lzma_nothrow; diff --git a/third-party/libxz/include/lzma/index.h b/third-party/libxz/include/lzma/index.h new file mode 100644 index 00000000..3dac6fb8 --- /dev/null +++ b/third-party/libxz/include/lzma/index.h @@ -0,0 +1,686 @@ +/** + * \file lzma/index.h + * \brief Handling of .xz Index and related information + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Opaque data type to hold the Index(es) and other information + * + * lzma_index often holds just one .xz Index and possibly the Stream Flags + * of the same Stream and size of the Stream Padding field. However, + * multiple lzma_indexes can be concatenated with lzma_index_cat() and then + * there may be information about multiple Streams in the same lzma_index. + * + * Notes about thread safety: Only one thread may modify lzma_index at + * a time. All functions that take non-const pointer to lzma_index + * modify it. As long as no thread is modifying the lzma_index, getting + * information from the same lzma_index can be done from multiple threads + * at the same time with functions that take a const pointer to + * lzma_index or use lzma_index_iter. The same iterator must be used + * only by one thread at a time, of course, but there can be as many + * iterators for the same lzma_index as needed. + */ +typedef struct lzma_index_s lzma_index; + + +/** + * \brief Iterator to get information about Blocks and Streams + */ +typedef struct { + struct { + /** + * \brief Pointer to Stream Flags + * + * This is NULL if Stream Flags have not been set for + * this Stream with lzma_index_stream_flags(). + */ + const lzma_stream_flags *flags; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + + /** + * \brief Stream number in the lzma_index + * + * The first Stream is 1. + */ + lzma_vli number; + + /** + * \brief Number of Blocks in the Stream + * + * If this is zero, the block structure below has + * undefined values. + */ + lzma_vli block_count; + + /** + * \brief Compressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli compressed_offset; + + /** + * \brief Uncompressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_offset; + + /** + * \brief Compressed size of this Stream + * + * This includes all headers except the possible + * Stream Padding after this Stream. + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed size of this Stream + */ + lzma_vli uncompressed_size; + + /** + * \brief Size of Stream Padding after this Stream + * + * If it hasn't been set with lzma_index_stream_padding(), + * this defaults to zero. Stream Padding is always + * a multiple of four bytes. + */ + lzma_vli padding; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + } stream; + + struct { + /** + * \brief Block number in the file + * + * The first Block is 1. + */ + lzma_vli number_in_file; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the + * lzma_index (i.e. usually the beginning of the .xz file). + * Normally this is where you should seek in the .xz file + * to start decompressing this Block. + */ + lzma_vli compressed_file_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + * + * When doing random-access reading, it is possible that + * the target offset is not exactly at Block boundary. One + * will need to compare the target offset against + * uncompressed_file_offset or uncompressed_stream_offset, + * and possibly decode and throw away some amount of data + * before reaching the target offset. + */ + lzma_vli uncompressed_file_offset; + + /** + * \brief Block number in this Stream + * + * The first Block is 1. + */ + lzma_vli number_in_stream; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli compressed_stream_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli uncompressed_stream_offset; + + /** + * \brief Uncompressed size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the uncompressed size. + */ + lzma_vli uncompressed_size; + + /** + * \brief Unpadded size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the unpadded size. + */ + lzma_vli unpadded_size; + + /** + * \brief Total compressed size + * + * This includes all headers and padding in this Block. + * This is useful if you need to know how many bytes + * the Block decoder will actually read. + */ + lzma_vli total_size; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + const void *reserved_ptr4; + } block; + + /* + * Internal data which is used to store the state of the iterator. + * The exact format may vary between liblzma versions, so don't + * touch these in any way. + */ + union { + const void *p; + size_t s; + lzma_vli v; + } internal[6]; +} lzma_index_iter; + + +/** + * \brief Operation mode for lzma_index_iter_next() + */ +typedef enum { + LZMA_INDEX_ITER_ANY = 0, + /**< + * \brief Get the next Block or Stream + * + * Go to the next Block if the current Stream has at least + * one Block left. Otherwise go to the next Stream even if + * it has no Blocks. If the Stream has no Blocks + * (lzma_index_iter.stream.block_count == 0), + * lzma_index_iter.block will have undefined values. + */ + + LZMA_INDEX_ITER_STREAM = 1, + /**< + * \brief Get the next Stream + * + * Go to the next Stream even if the current Stream has + * unread Blocks left. If the next Stream has at least one + * Block, the iterator will point to the first Block. + * If there are no Blocks, lzma_index_iter.block will have + * undefined values. + */ + + LZMA_INDEX_ITER_BLOCK = 2, + /**< + * \brief Get the next Block + * + * Go to the next Block if the current Stream has at least + * one Block left. If the current Stream has no Blocks left, + * the next Stream with at least one Block is located and + * the iterator will be made to point to the first Block of + * that Stream. + */ + + LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 + /**< + * \brief Get the next non-empty Block + * + * This is like LZMA_INDEX_ITER_BLOCK except that it will + * skip Blocks whose Uncompressed Size is zero. + */ + +} lzma_index_iter_mode; + + +/** + * \brief Calculate memory usage of lzma_index + * + * On disk, the size of the Index field depends on both the number of Records + * stored and how big values the Records store (due to variable-length integer + * encoding). When the Index is kept in lzma_index structure, the memory usage + * depends only on the number of Records/Blocks stored in the Index(es), and + * in case of concatenated lzma_indexes, the number of Streams. The size in + * RAM is almost always significantly bigger than in the encoded form on disk. + * + * This function calculates an approximate amount of memory needed hold + * the given number of Streams and Blocks in lzma_index structure. This + * value may vary between CPU architectures and also between liblzma versions + * if the internal implementation is modified. + */ +extern LZMA_API(uint64_t) lzma_index_memusage( + lzma_vli streams, lzma_vli blocks) lzma_nothrow; + + +/** + * \brief Calculate the memory usage of an existing lzma_index + * + * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), + * lzma_index_block_count(i)). + */ +extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) + lzma_nothrow; + + +/** + * \brief Allocate and initialize a new lzma_index structure + * + * \return On success, a pointer to an empty initialized lzma_index is + * returned. If allocation fails, NULL is returned. + */ +extern LZMA_API(lzma_index *) lzma_index_init(const lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Deallocate lzma_index + * + * If i is NULL, this does nothing. + */ +extern LZMA_API(void) lzma_index_end( + lzma_index *i, const lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Add a new Block to lzma_index + * + * \param i Pointer to a lzma_index structure + * \param allocator Pointer to lzma_allocator, or NULL to + * use malloc() + * \param unpadded_size Unpadded Size of a Block. This can be + * calculated with lzma_block_unpadded_size() + * after encoding or decoding the Block. + * \param uncompressed_size Uncompressed Size of a Block. This can be + * taken directly from lzma_block structure + * after encoding or decoding the Block. + * + * Appending a new Block does not invalidate iterators. For example, + * if an iterator was pointing to the end of the lzma_index, after + * lzma_index_append() it is possible to read the next Block with + * an existing iterator. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_append( + lzma_index *i, const lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Set the Stream Flags + * + * Set the Stream Flags of the last (and typically the only) Stream + * in lzma_index. This can be useful when reading information from the + * lzma_index, because to decode Blocks, knowing the integrity check type + * is needed. + * + * The given Stream Flags are copied into internal preallocated structure + * in the lzma_index, thus the caller doesn't need to keep the *stream_flags + * available after calling this function. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_flags( + lzma_index *i, const lzma_stream_flags *stream_flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the types of integrity Checks + * + * If lzma_index_stream_flags() is used to set the Stream Flags for + * every Stream, lzma_index_checks() can be used to get a bitmask to + * indicate which Check types have been used. It can be useful e.g. if + * showing the Check types to the user. + * + * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. + */ +extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the amount of Stream Padding + * + * Set the amount of Stream Padding of the last (and typically the only) + * Stream in the lzma_index. This is needed when planning to do random-access + * reading within multiple concatenated Streams. + * + * By default, the amount of Stream Padding is assumed to be zero bytes. + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: The file size would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_padding( + lzma_index *i, lzma_vli stream_padding) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the number of Streams + */ +extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the number of Blocks + * + * This returns the total number of Blocks in lzma_index. To get number + * of Blocks in individual Streams, use lzma_index_iter. + */ +extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Stream + * + * If multiple lzma_indexes have been combined, this works as if the Blocks + * were in a single Stream. This is useful if you are going to combine + * Blocks from multiple Streams into a single new Stream. + */ +extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + */ +extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the file + * + * When no lzma_indexes have been combined with lzma_index_cat() and there is + * no Stream Padding, this function is identical to lzma_index_stream_size(). + * If multiple lzma_indexes have been combined, this includes also the headers + * of each separate Stream and the possible Stream Padding fields. + */ +extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the uncompressed size of the file + */ +extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize an iterator + * + * \param iter Pointer to a lzma_index_iter structure + * \param i lzma_index to which the iterator will be associated + * + * This function associates the iterator with the given lzma_index, and calls + * lzma_index_iter_rewind() on the iterator. + * + * This function doesn't allocate any memory, thus there is no + * lzma_index_iter_end(). The iterator is valid as long as the + * associated lzma_index is valid, that is, until lzma_index_end() or + * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't + * become invalid if new Blocks are added to it with lzma_index_append() or + * if it is used as the destination in lzma_index_cat(). + * + * It is safe to make copies of an initialized lzma_index_iter, for example, + * to easily restart reading at some particular position. + */ +extern LZMA_API(void) lzma_index_iter_init( + lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; + + +/** + * \brief Rewind the iterator + * + * Rewind the iterator so that next call to lzma_index_iter_next() will + * return the first Block or Stream. + */ +extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) + lzma_nothrow; + + +/** + * \brief Get the next Block or Stream + * + * \param iter Iterator initialized with lzma_index_iter_init() + * \param mode Specify what kind of information the caller wants + * to get. See lzma_index_iter_mode for details. + * + * \return If next Block or Stream matching the mode was found, *iter + * is updated and this function returns false. If no Block or + * Stream matching the mode is found, *iter is not modified + * and this function returns true. If mode is set to an unknown + * value, *iter is not modified and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_next( + lzma_index_iter *iter, lzma_index_iter_mode mode) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Locate a Block + * + * If it is possible to seek in the .xz file, it is possible to parse + * the Index field(s) and use lzma_index_iter_locate() to do random-access + * reading with granularity of Block size. + * + * \param iter Iterator that was earlier initialized with + * lzma_index_iter_init(). + * \param target Uncompressed target offset which the caller would + * like to locate from the Stream + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Stream and Block containing the requested + * uncompressed offset is stored into *iter. + * - Internal state of the iterator is adjusted so that + * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. + * - This function returns false. + * + * If target is greater than the uncompressed size of the Stream, *iter + * is not modified, and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_locate( + lzma_index_iter *iter, lzma_vli target) lzma_nothrow; + + +/** + * \brief Concatenate lzma_indexes + * + * Concatenating lzma_indexes is useful when doing random-access reading in + * multi-Stream .xz file, or when combining multiple Streams into single + * Stream. + * + * \param dest lzma_index after which src is appended + * \param src lzma_index to be appended after dest. If this + * function succeeds, the memory allocated for src + * is freed or moved to be part of dest, and all + * iterators pointing to src will become invalid. + * \param allocator Custom memory allocator; can be NULL to use + * malloc() and free(). + * + * \return - LZMA_OK: lzma_indexes were concatenated successfully. + * src is now a dangling pointer. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *dest, lzma_index *src, + const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Duplicate lzma_index + * + * \return A copy of the lzma_index, or NULL if memory allocation failed. + */ +extern LZMA_API(lzma_index *) lzma_index_dup( + const lzma_index *i, const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index encoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i Pointer to lzma_index which should be encoded. + * + * The valid `action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * It is enough to use only one of them (you can choose freely). + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_encoder( + lzma_stream *strm, const lzma_index *i) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i The decoded Index will be made available via + * this pointer. Initially this function will + * set *i to NULL (the old value is ignored). If + * decoding succeeds (lzma_code() returns + * LZMA_STREAM_END), *i will be set to point + * to a new lzma_index, which the application + * has to later free with lzma_index_end(). + * \param memlimit How much memory the resulting lzma_index is + * allowed to require. liblzma 5.2.3 and earlier + * don't allow 0 here and return LZMA_PROG_ERROR; + * later versions treat 0 as if 1 had been specified. + * + * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but it's allowed because it may + * simplify certain types of applications. + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + * + * liblzma 5.2.3 and older list also LZMA_MEMLIMIT_ERROR here + * but that error code has never been possible from this + * initialization function. + */ +extern LZMA_API(lzma_ret) lzma_index_decoder( + lzma_stream *strm, lzma_index **i, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Index encoder + * + * \param i lzma_index to be encoded + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Output buffer is too small. Use + * lzma_index_size() to find out how much output + * space is needed. + * - LZMA_PROG_ERROR + * + * \note This function doesn't take allocator argument since all + * the internal data is allocated on stack. + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call .xz Index decoder + * + * \param i If decoding succeeds, *i will point to a new + * lzma_index, which the application has to + * later free with lzma_index_end(). If an error + * occurs, *i will be NULL. The old value of *i + * is always ignored and thus doesn't need to be + * initialized by the caller. + * \param memlimit Pointer to how much memory the resulting + * lzma_index is allowed to require. The value + * pointed by this pointer is modified if and only + * if LZMA_MEMLIMIT_ERROR is returned. + * \param allocator Pointer to lzma_allocator, or NULL to use malloc() + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, + uint64_t *memlimit, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; diff --git a/third-party/libxz/include/lzma/index_hash.h b/third-party/libxz/include/lzma/index_hash.h new file mode 100644 index 00000000..9287f1df --- /dev/null +++ b/third-party/libxz/include/lzma/index_hash.h @@ -0,0 +1,107 @@ +/** + * \file lzma/index_hash.h + * \brief Validate Index by using a hash function + * + * Hashing makes it possible to use constant amount of memory to validate + * Index of arbitrary size. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, a new lzma_index_hash structure is allocated, + * initialized, and a pointer to it returned. If allocation fails, NULL + * is returned. + * + * If index_hash is non-NULL, it is reinitialized and the same pointer + * returned. In this case, return value cannot be NULL or a different + * pointer than the index_hash that was given as an argument. + */ +extern LZMA_API(lzma_index_hash *) lzma_index_hash_init( + lzma_index_hash *index_hash, const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Deallocate lzma_index_hash structure + */ +extern LZMA_API(void) lzma_index_hash_end( + lzma_index_hash *index_hash, const lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Record to an Index hash + * + * \param index Pointer to a lzma_index_hash structure + * \param unpadded_size Unpadded Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode and validate the Index field + * + * After telling the sizes of all Blocks with lzma_index_hash_append(), + * the actual Index field is decoded with this function. Specifically, + * once decoding of the Index field has been started, no more Records + * can be added using lzma_index_hash_append(). + * + * This function doesn't use lzma_stream structure to pass the input data. + * Instead, the input buffer is specified using three arguments. This is + * because it matches better the internal APIs of liblzma. + * + * \param index_hash Pointer to a lzma_index_hash structure + * \param in Pointer to the beginning of the input buffer + * \param in_pos in[*in_pos] is the next byte to process + * \param in_size in[in_size] is the first byte not to process + * + * \return - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_hash_size( + const lzma_index_hash *index_hash) + lzma_nothrow lzma_attr_pure; diff --git a/third-party/libxz/include/lzma/lzma12.h b/third-party/libxz/include/lzma/lzma12.h new file mode 100644 index 00000000..4e32fa3a --- /dev/null +++ b/third-party/libxz/include/lzma/lzma12.h @@ -0,0 +1,420 @@ +/** + * \file lzma/lzma12.h + * \brief LZMA1 and LZMA2 filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief LZMA1 Filter ID + * + * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, + * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from + * accidentally using LZMA when they actually want LZMA2. + * + * LZMA1 shouldn't be used for new applications unless you _really_ know + * what you are doing. LZMA2 is almost always a better choice. + */ +#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) + +/** + * \brief LZMA2 Filter ID + * + * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds + * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion + * when trying to compress uncompressible data), possibility to change + * lc/lp/pb in the middle of encoding, and some other internal improvements. + */ +#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) + + +/** + * \brief Match finders + * + * Match finder has major effect on both speed and compression ratio. + * Usually hash chains are faster than binary trees. + * + * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better + * choice, because binary trees get much higher compression ratio penalty + * with LZMA_SYNC_FLUSH. + * + * The memory usage formulas are only rough estimates, which are closest to + * reality when dict_size is a power of two. The formulas are more complex + * in reality, and can also change a little between liblzma versions. Use + * lzma_raw_encoder_memusage() to get more accurate estimate of memory usage. + */ +typedef enum { + LZMA_MF_HC3 = 0x03, + /**< + * \brief Hash Chain with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 7.5 + * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB + */ + + LZMA_MF_HC4 = 0x04, + /**< + * \brief Hash Chain with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 7.5 + * - dict_size > 32 MiB: dict_size * 6.5 + */ + + LZMA_MF_BT2 = 0x12, + /**< + * \brief Binary Tree with 2-byte hashing + * + * Minimum nice_len: 2 + * + * Memory usage: dict_size * 9.5 + */ + + LZMA_MF_BT3 = 0x13, + /**< + * \brief Binary Tree with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 11.5 + * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB + */ + + LZMA_MF_BT4 = 0x14 + /**< + * \brief Binary Tree with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 11.5 + * - dict_size > 32 MiB: dict_size * 10.5 + */ +} lzma_match_finder; + + +/** + * \brief Test if given match finder is supported + * + * Return true if the given match finder is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * isn't listed in lzma_match_finder enumeration; the return value will be + * false. + * + * There is no way to list which match finders are available in this + * particular liblzma version and build. It would be useless, because + * a new match finder, which the application developer wasn't aware, + * could require giving additional options to the encoder that the older + * match finders don't need. + */ +extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Compression modes + * + * This selects the function used to analyze the data produced by the match + * finder. + */ +typedef enum { + LZMA_MODE_FAST = 1, + /**< + * \brief Fast compression + * + * Fast mode is usually at its best when combined with + * a hash chain match finder. + */ + + LZMA_MODE_NORMAL = 2 + /**< + * \brief Normal compression + * + * This is usually notably slower than fast mode. Use this + * together with binary tree match finders to expose the + * full potential of the LZMA1 or LZMA2 encoder. + */ +} lzma_mode; + + +/** + * \brief Test if given compression mode is supported + * + * Return true if the given compression mode is supported by this liblzma + * build. Otherwise false is returned. It is safe to call this with a value + * that isn't listed in lzma_mode enumeration; the return value will be false. + * + * There is no way to list which modes are available in this particular + * liblzma version and build. It would be useless, because a new compression + * mode, which the application developer wasn't aware, could require giving + * additional options to the encoder that the older modes don't need. + */ +extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Options specific to the LZMA1 and LZMA2 filters + * + * Since LZMA1 and LZMA2 share most of the code, it's simplest to share + * the options structure too. For encoding, all but the reserved variables + * need to be initialized unless specifically mentioned otherwise. + * lzma_lzma_preset() can be used to get a good starting point. + * + * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and + * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb. + */ +typedef struct { + /** + * \brief Dictionary size in bytes + * + * Dictionary size indicates how many bytes of the recently processed + * uncompressed data is kept in memory. One method to reduce size of + * the uncompressed data is to store distance-length pairs, which + * indicate what data to repeat from the dictionary buffer. Thus, + * the bigger the dictionary, the better the compression ratio + * usually is. + * + * Maximum size of the dictionary depends on multiple things: + * - Memory usage limit + * - Available address space (not a problem on 64-bit systems) + * - Selected match finder (encoder only) + * + * Currently the maximum dictionary size for encoding is 1.5 GiB + * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit + * systems for certain match finder implementation reasons. In the + * future, there may be match finders that support bigger + * dictionaries. + * + * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e. + * UINT32_MAX), so increasing the maximum dictionary size of the + * encoder won't cause problems for old decoders. + * + * Because extremely small dictionaries sizes would have unneeded + * overhead in the decoder, the minimum dictionary size is 4096 bytes. + * + * \note When decoding, too big dictionary does no other harm + * than wasting memory. + */ + uint32_t dict_size; +# define LZMA_DICT_SIZE_MIN UINT32_C(4096) +# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) + + /** + * \brief Pointer to an initial dictionary + * + * It is possible to initialize the LZ77 history window using + * a preset dictionary. It is useful when compressing many + * similar, relatively small chunks of data independently from + * each other. The preset dictionary should contain typical + * strings that occur in the files being compressed. The most + * probable strings should be near the end of the preset dictionary. + * + * This feature should be used only in special situations. For + * now, it works correctly only with raw encoding and decoding. + * Currently none of the container formats supported by + * liblzma allow preset dictionary when decoding, thus if + * you create a .xz or .lzma file with preset dictionary, it + * cannot be decoded with the regular decoder functions. In the + * future, the .xz format will likely get support for preset + * dictionary though. + */ + const uint8_t *preset_dict; + + /** + * \brief Size of the preset dictionary + * + * Specifies the size of the preset dictionary. If the size is + * bigger than dict_size, only the last dict_size bytes are + * processed. + * + * This variable is read only when preset_dict is not NULL. + * If preset_dict is not NULL but preset_dict_size is zero, + * no preset dictionary is used (identical to only setting + * preset_dict to NULL). + */ + uint32_t preset_dict_size; + + /** + * \brief Number of literal context bits + * + * How many of the highest bits of the previous uncompressed + * eight-bit byte (also known as `literal') are taken into + * account when predicting the bits of the next literal. + * + * E.g. in typical English text, an upper-case letter is + * often followed by a lower-case letter, and a lower-case + * letter is usually followed by another lower-case letter. + * In the US-ASCII character set, the highest three bits are 010 + * for upper-case letters and 011 for lower-case letters. + * When lc is at least 3, the literal coding can take advantage of + * this property in the uncompressed data. + * + * There is a limit that applies to literal context bits and literal + * position bits together: lc + lp <= 4. Without this limit the + * decoding could become very slow, which could have security related + * results in some cases like email servers doing virus scanning. + * This limit also simplifies the internal implementation in liblzma. + * + * There may be LZMA1 streams that have lc + lp > 4 (maximum possible + * lc would be 8). It is not possible to decode such streams with + * liblzma. + */ + uint32_t lc; +# define LZMA_LCLP_MIN 0 +# define LZMA_LCLP_MAX 4 +# define LZMA_LC_DEFAULT 3 + + /** + * \brief Number of literal position bits + * + * lp affects what kind of alignment in the uncompressed data is + * assumed when encoding literals. A literal is a single 8-bit byte. + * See pb below for more information about alignment. + */ + uint32_t lp; +# define LZMA_LP_DEFAULT 0 + + /** + * \brief Number of position bits + * + * pb affects what kind of alignment in the uncompressed data is + * assumed in general. The default means four-byte alignment + * (2^ pb =2^2=4), which is often a good choice when there's + * no better guess. + * + * When the aligment is known, setting pb accordingly may reduce + * the file size a little. E.g. with text files having one-byte + * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can + * improve compression slightly. For UTF-16 text, pb=1 is a good + * choice. If the alignment is an odd number like 3 bytes, pb=0 + * might be the best choice. + * + * Even though the assumed alignment can be adjusted with pb and + * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment. + * It might be worth taking into account when designing file formats + * that are likely to be often compressed with LZMA1 or LZMA2. + */ + uint32_t pb; +# define LZMA_PB_MIN 0 +# define LZMA_PB_MAX 4 +# define LZMA_PB_DEFAULT 2 + + /** Compression mode */ + lzma_mode mode; + + /** + * \brief Nice length of a match + * + * This determines how many bytes the encoder compares from the match + * candidates when looking for the best match. Once a match of at + * least nice_len bytes long is found, the encoder stops looking for + * better candidates and encodes the match. (Naturally, if the found + * match is actually longer than nice_len, the actual length is + * encoded; it's not truncated to nice_len.) + * + * Bigger values usually increase the compression ratio and + * compression time. For most files, 32 to 128 is a good value, + * which gives very good compression ratio at good speed. + * + * The exact minimum value depends on the match finder. The maximum + * is 273, which is the maximum length of a match that LZMA1 and + * LZMA2 can encode. + */ + uint32_t nice_len; + + /** Match finder ID */ + lzma_match_finder mf; + + /** + * \brief Maximum search depth in the match finder + * + * For every input byte, match finder searches through the hash chain + * or binary tree in a loop, each iteration going one step deeper in + * the chain or tree. The searching stops if + * - a match of at least nice_len bytes long is found; + * - all match candidates from the hash chain or binary tree have + * been checked; or + * - maximum search depth is reached. + * + * Maximum search depth is needed to prevent the match finder from + * wasting too much time in case there are lots of short match + * candidates. On the other hand, stopping the search before all + * candidates have been checked can reduce compression ratio. + * + * Setting depth to zero tells liblzma to use an automatic default + * value, that depends on the selected match finder and nice_len. + * The default is in the range [4, 200] or so (it may vary between + * liblzma versions). + * + * Using a bigger depth value than the default can increase + * compression ratio in some cases. There is no strict maximum value, + * but high values (thousands or millions) should be used with care: + * the encoder could remain fast enough with typical input, but + * malicious input could cause the match finder to slow down + * dramatically, possibly creating a denial of service attack. + */ + uint32_t depth; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + uint32_t reserved_int5; + uint32_t reserved_int6; + uint32_t reserved_int7; + uint32_t reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_lzma; + + +/** + * \brief Set a compression preset to lzma_options_lzma structure + * + * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9 + * of the xz command line tool. In addition, it is possible to bitwise-or + * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported. + * The flags are defined in container.h, because the flags are used also + * with lzma_easy_encoder(). + * + * The preset values are subject to changes between liblzma versions. + * + * This function is available only if LZMA1 or LZMA2 encoder has been enabled + * when building liblzma. + * + * \return On success, false is returned. If the preset is not + * supported, true is returned. + */ +extern LZMA_API(lzma_bool) lzma_lzma_preset( + lzma_options_lzma *options, uint32_t preset) lzma_nothrow; diff --git a/third-party/libxz/include/lzma/stream_flags.h b/third-party/libxz/include/lzma/stream_flags.h new file mode 100644 index 00000000..bbdd4082 --- /dev/null +++ b/third-party/libxz/include/lzma/stream_flags.h @@ -0,0 +1,223 @@ +/** + * \file lzma/stream_flags.h + * \brief .xz Stream Header and Stream Footer encoder and decoder + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Size of Stream Header and Stream Footer + * + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .xz file format is + * developed in future. + */ +#define LZMA_STREAM_HEADER_SIZE 12 + + +/** + * \brief Options for encoding/decoding Stream Header and Stream Footer + */ +typedef struct { + /** + * \brief Stream Flags format version + * + * To prevent API and ABI breakages if new features are needed in + * Stream Header or Stream Footer, a version number is used to + * indicate which fields in this structure are in use. For now, + * version must always be zero. With non-zero version, the + * lzma_stream_header_encode() and lzma_stream_footer_encode() + * will return LZMA_OPTIONS_ERROR. + * + * lzma_stream_header_decode() and lzma_stream_footer_decode() + * will always set this to the lowest value that supports all the + * features indicated by the Stream Flags field. The application + * must check that the version number set by the decoding functions + * is supported by the application. Otherwise it is possible that + * the application will decode the Stream incorrectly. + */ + uint32_t version; + + /** + * \brief Backward Size + * + * Backward Size must be a multiple of four bytes. In this Stream + * format version, Backward Size is the size of the Index field. + * + * Backward Size isn't actually part of the Stream Flags field, but + * it is convenient to include in this structure anyway. Backward + * Size is present only in the Stream Footer. There is no need to + * initialize backward_size when encoding Stream Header. + * + * lzma_stream_header_decode() always sets backward_size to + * LZMA_VLI_UNKNOWN so that it is convenient to use + * lzma_stream_flags_compare() when both Stream Header and Stream + * Footer have been decoded. + */ + lzma_vli backward_size; +# define LZMA_BACKWARD_SIZE_MIN 4 +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) + + /** + * \brief Check ID + * + * This indicates the type of the integrity check calculated from + * uncompressed data. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the + * names of these variables may change. + * + * (We will never be able to use all of these since Stream Flags + * is just two bytes plus Backward Size of four bytes. But it's + * nice to have the proper types when they are needed.) + */ + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + uint32_t reserved_int1; + uint32_t reserved_int2; + +} lzma_stream_flags; + + +/** + * \brief Encode Stream Header + * + * \param options Stream Header options to be encoded. + * options->backward_size is ignored and doesn't + * need to be initialized. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Stream Footer + * + * \param options Stream Footer options to be encoded. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Header + * + * \param options Target for the decoded Stream Header options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_compare(). + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in the header. + * + * \note When decoding .xz files that contain multiple Streams, it may + * make sense to print "file format not recognized" only if + * decoding of the Stream Header of the _first_ Stream gives + * LZMA_FORMAT_ERROR. If non-first Stream Header gives + * LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is + * probably more appropriate. + * + * For example, Stream decoder in liblzma uses LZMA_DATA_ERROR if + * LZMA_FORMAT_ERROR is returned by lzma_stream_header_decode() + * when decoding non-first Stream. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Footer + * + * \param options Target for the decoded Stream Header options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in Stream Footer. + * + * \note If Stream Header was already decoded successfully, but + * decoding Stream Footer returns LZMA_FORMAT_ERROR, the + * application should probably report some other error message + * than "file format not recognized", since the file more likely + * is corrupt (possibly truncated). Stream decoder in liblzma + * uses LZMA_DATA_ERROR in this situation. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Compare two lzma_stream_flags structures + * + * backward_size values are compared only if both are not + * LZMA_VLI_UNKNOWN. + * + * \return - LZMA_OK: Both are equal. If either had backward_size set + * to LZMA_VLI_UNKNOWN, backward_size values were not + * compared or validated. + * - LZMA_DATA_ERROR: The structures differ. + * - LZMA_OPTIONS_ERROR: version in either structure is greater + * than the maximum supported version (currently zero). + * - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or + * backward_size. + */ +extern LZMA_API(lzma_ret) lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) + lzma_nothrow lzma_attr_pure; diff --git a/third-party/libxz/include/lzma/version.h b/third-party/libxz/include/lzma/version.h new file mode 100644 index 00000000..143c7dea --- /dev/null +++ b/third-party/libxz/include/lzma/version.h @@ -0,0 +1,121 @@ +/** + * \file lzma/version.h + * \brief Version number + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* + * Version number split into components + */ +#define LZMA_VERSION_MAJOR 5 +#define LZMA_VERSION_MINOR 2 +#define LZMA_VERSION_PATCH 4 +#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE + +#ifndef LZMA_VERSION_COMMIT +# define LZMA_VERSION_COMMIT "" +#endif + + +/* + * Map symbolic stability levels to integers. + */ +#define LZMA_VERSION_STABILITY_ALPHA 0 +#define LZMA_VERSION_STABILITY_BETA 1 +#define LZMA_VERSION_STABILITY_STABLE 2 + + +/** + * \brief Compile-time version number + * + * The version number is of format xyyyzzzs where + * - x = major + * - yyy = minor + * - zzz = revision + * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable + * + * The same xyyyzzz triplet is never reused with different stability levels. + * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta + * or 5.1.0 stable. + * + * \note The version number of liblzma has nothing to with + * the version number of Igor Pavlov's LZMA SDK. + */ +#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + + LZMA_VERSION_MINOR * UINT32_C(10000) \ + + LZMA_VERSION_PATCH * UINT32_C(10) \ + + LZMA_VERSION_STABILITY) + + +/* + * Macros to construct the compile-time version string + */ +#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA +# define LZMA_VERSION_STABILITY_STRING "alpha" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA +# define LZMA_VERSION_STABILITY_STRING "beta" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE +# define LZMA_VERSION_STABILITY_STRING "" +#else +# error Incorrect LZMA_VERSION_STABILITY +#endif + +#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ + #major "." #minor "." #patch stability commit + +#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ + LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) + + +/** + * \brief Compile-time version as a string + * + * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable + * versions don't have any "stable" suffix). In future, a snapshot built + * from source code repository may include an additional suffix, for example + * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form + * in LZMA_VERSION macro. + */ +#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ + LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ + LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ + LZMA_VERSION_COMMIT) + + +/* #ifndef is needed for use with windres (MinGW or Cygwin). */ +#ifndef LZMA_H_INTERNAL_RC + +/** + * \brief Run-time version number as an integer + * + * Return the value of LZMA_VERSION macro at the compile time of liblzma. + * This allows the application to compare if it was built against the same, + * older, or newer version of liblzma that is currently running. + */ +extern LZMA_API(uint32_t) lzma_version_number(void) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Run-time version as a string + * + * This function may be useful if you want to display which version of + * liblzma your application is currently using. + */ +extern LZMA_API(const char *) lzma_version_string(void) + lzma_nothrow lzma_attr_const; + +#endif diff --git a/third-party/libxz/include/lzma/vli.h b/third-party/libxz/include/lzma/vli.h new file mode 100644 index 00000000..9ad13f2e --- /dev/null +++ b/third-party/libxz/include/lzma/vli.h @@ -0,0 +1,166 @@ +/** + * \file lzma/vli.h + * \brief Variable-length integer handling + * + * In the .xz format, most integers are encoded in a variable-length + * representation, which is sometimes called little endian base-128 encoding. + * This saves space when smaller values are more likely than bigger values. + * + * The encoding scheme encodes seven bits to every byte, using minimum + * number of bytes required to represent the given value. Encodings that use + * non-minimum number of bytes are invalid, thus every integer has exactly + * one encoded representation. The maximum number of bits in a VLI is 63, + * thus the vli argument must be less than or equal to UINT64_MAX / 2. You + * should use LZMA_VLI_MAX for clarity. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum supported value of a variable-length integer + */ +#define LZMA_VLI_MAX (UINT64_MAX / 2) + +/** + * \brief VLI value to denote that the value is unknown + */ +#define LZMA_VLI_UNKNOWN UINT64_MAX + +/** + * \brief Maximum supported encoded length of variable length integers + */ +#define LZMA_VLI_BYTES_MAX 9 + +/** + * \brief VLI constant suffix + */ +#define LZMA_VLI_C(n) UINT64_C(n) + + +/** + * \brief Variable-length integer type + * + * Valid VLI values are in the range [0, LZMA_VLI_MAX]. Unknown value is + * indicated with LZMA_VLI_UNKNOWN, which is the maximum value of the + * underlaying integer type. + * + * lzma_vli will be uint64_t for the foreseeable future. If a bigger size + * is needed in the future, it is guaranteed that 2 * LZMA_VLI_MAX will + * not overflow lzma_vli. This simplifies integer overflow detection. + */ +typedef uint64_t lzma_vli; + + +/** + * \brief Validate a variable-length integer + * + * This is useful to test that application has given acceptable values + * for example in the uncompressed_size and compressed_size variables. + * + * \return True if the integer is representable as VLI or if it + * indicates unknown value. + */ +#define lzma_vli_is_valid(vli) \ + ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) + + +/** + * \brief Encode a variable-length integer + * + * This function has two modes: single-call and multi-call. Single-call mode + * encodes the whole integer at once; it is an error if the output buffer is + * too small. Multi-call mode saves the position in *vli_pos, and thus it is + * possible to continue encoding if the buffer becomes full before the whole + * integer has been encoded. + * + * \param vli Integer to be encoded + * \param vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer in + * multi-call mode, *vli_pos must be set to zero. + * To use single-call encoding, set vli_pos to NULL. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; single-call mode doesn't use + * LZMA_BUF_ERROR, since the application should have checked + * the encoded size with lzma_vli_size(). + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely written out yet. + * - LZMA_STREAM_END: Integer successfully encoded. + * - LZMA_BUF_ERROR: No output space was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli, size_t *vli_pos, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Decode a variable-length integer + * + * Like lzma_vli_encode(), this function has single-call and multi-call modes. + * + * \param vli Pointer to decoded integer. The decoder will + * initialize it to zero when *vli_pos == 0, so + * application isn't required to initialize *vli. + * \param vli_pos How many bytes have already been decoded. When + * starting to decode a new integer in multi-call + * mode, *vli_pos must be initialized to zero. To + * use single-call decoding, set vli_pos to NULL. + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting + * the end of the input buffer before the whole integer was + * decoded; providing no input at all will use LZMA_DATA_ERROR. + * - LZMA_PROG_ERROR: Arguments are not sane. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely decoded yet. + * - LZMA_STREAM_END: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_BUF_ERROR: No input was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *vli, size_t *vli_pos, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; + + +/** + * \brief Get the number of bytes required to encode a VLI + * + * \return Number of bytes on success (1-9). If vli isn't valid, + * zero is returned. + */ +extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli) + lzma_nothrow lzma_attr_pure; diff --git a/third-party/libxz/lib/liblzma.5.dylib b/third-party/libxz/lib/liblzma.5.dylib new file mode 100755 index 00000000..d46584b3 --- /dev/null +++ b/third-party/libxz/lib/liblzma.5.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9518711d5bd0576f22208d70bb5f0d5f89298224c519afa5c3fd1843190e99ef +size 224916 diff --git a/third-party/libxz/lib/liblzma.a b/third-party/libxz/lib/liblzma.a new file mode 100644 index 00000000..ee58542e --- /dev/null +++ b/third-party/libxz/lib/liblzma.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4543a29d523b6bd3d15dc3c0020b5d63779a7f6aca04f1d98731167c8bfe88f +size 326552 diff --git a/third-party/libxz/lib/liblzma.dylib b/third-party/libxz/lib/liblzma.dylib new file mode 120000 index 00000000..ef65a987 --- /dev/null +++ b/third-party/libxz/lib/liblzma.dylib @@ -0,0 +1 @@ +liblzma.5.dylib \ No newline at end of file diff --git a/third-party/libxz/lib/liblzma.la b/third-party/libxz/lib/liblzma.la new file mode 100755 index 00000000..34cb946c --- /dev/null +++ b/third-party/libxz/lib/liblzma.la @@ -0,0 +1,41 @@ +# liblzma.la - a libtool library file +# Generated by libtool (GNU libtool) 2.4.6.40-6ca5-dirty +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='liblzma.5.dylib' + +# Names of this library. +library_names='liblzma.5.dylib liblzma.dylib' + +# The name of the static archive. +old_library='liblzma.a' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags=' -pthread' + +# Libraries that this one depends upon. +dependency_libs='' + +# Names of additional weak libraries provided by this library +weak_library_names='' + +# Version information for liblzma. +current=7 +age=2 +revision=4 + +# Is this an already installed library? +installed=yes + +# Should we warn about portability when linking against -modules? +shouldnotlink=no + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/Users/hat/vimr/.deps/xz/../../third-party/libxz/lib' diff --git a/third-party/libxz/lib/pkgconfig/liblzma.pc b/third-party/libxz/lib/pkgconfig/liblzma.pc new file mode 100644 index 00000000..2a54efc8 --- /dev/null +++ b/third-party/libxz/lib/pkgconfig/liblzma.pc @@ -0,0 +1,19 @@ +# +# Author: Lasse Collin +# +# This file has been put into the public domain. +# You can do whatever you want with this file. +# + +prefix=/Users/hat/vimr/.deps/xz/../../third-party/libxz +exec_prefix=/Users/hat/vimr/.deps/xz/../../third-party/libxz +libdir=/Users/hat/vimr/.deps/xz/../../third-party/libxz/lib +includedir=/Users/hat/vimr/.deps/xz/../../third-party/libxz/include + +Name: liblzma +Description: General purpose data compression library +URL: https://tukaani.org/xz/ +Version: 5.2.4 +Cflags: -I${includedir} +Libs: -L${libdir} -llzma +Libs.private: -D_THREAD_SAFE -pthread